Browse Source

新版任务新建流程

maxiaoshan 3 years ago
parent
commit
d153bb3ee1
17 changed files with 2617 additions and 2109 deletions
  1. 0 726
      src/code.go
  2. 77 5
      src/config.json
  3. 0 188
      src/downloadnum.go
  4. 453 0
      src/logs/task.log
  5. 178 0
      src/luatask/downloadnum.go
  6. 1335 0
      src/luatask/task.go
  7. 23 69
      src/main.go
  8. 0 1088
      src/task.go
  9. 154 0
      src/timetask/random.go
  10. 11 10
      src/timetask/summary.go
  11. 150 0
      src/timetask/wxworkwarn.go
  12. 12 4
      src/user.json
  13. 76 0
      src/util/config.go
  14. 136 0
      src/util/msgservice.go
  15. 1 7
      src/util/util.go
  16. 8 9
      src/util/work.go
  17. 3 3
      src/worktime.json

+ 0 - 726
src/code.go

@@ -1,726 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"math"
-	qu "qfw/util"
-	"sort"
-	"sync"
-	"time"
-)
-
-var (
-	YearMinCodeMap      map[string]bool                               //luayearmincode中,爬虫代码:循环周期
-	SendFirstMap        map[string]*Lua                               //
-	YearMinDownloadNum  int                                           //一年下载最低值
-	IntervalMaxNum      int                                           //区间最大值
-	PublishtimeInterval = []float64{1.0, 3.0, 10.0, 20.0, 31.0, 93.0} //[0,1),[1,3),[3,10),[10,20),[20,31),[31,31*3),[31*3,···)
-	IntervalMap         = map[int]string{
-		1: "[0,1)",
-		2: "[1,3)",
-		3: "[3,10)",
-		4: "[10,20)",
-		5: "[20,31)",
-		6: "[31,93)",
-		7: "[93,···)",
-	}
-	IntervalRotateTime = map[string]int{ //区间爬虫一轮次时间(月)
-		"[0,1)":    3,
-		"[1,3)":    3,
-		"[3,10)":   6,
-		"[10,20)":  6,
-		"[20,31)":  6,
-		"[31,93)":  12,
-		"[93,···)": 12,
-	}
-)
-
-type Lua struct {
-	Site     string
-	Channel  string
-	Modify   string
-	Modifyid string
-	Code     string
-	Event    int
-	Count    int
-}
-
-func LuaYearMinCodeCreateTask() {
-	defer qu.Catch()
-	GetAllLuaYearMinCode() //获取luayearmincode所有爬虫
-	CreateTask()           //
-}
-
-func GetAllLuaYearMinCode() {
-	defer qu.Catch()
-	YearMinCodeMap = map[string]bool{}
-	SendFirstMap = map[string]*Lua{}
-	list, _ := MgoE.Find("luayearmincode", nil, nil, `{"publishtime":0}`, false, -1, -1)
-	for _, l := range *list {
-		code := qu.ObjToString(l["code"])
-		YearMinCodeMap[code] = true
-		sf, _ := l["sendfirst"].(bool)
-		sd, _ := l["send"].(bool)
-		if sf && !sd {
-			lua := &Lua{
-				Site:     qu.ObjToString(l["site"]),
-				Channel:  qu.ObjToString(l["channel"]),
-				Modify:   qu.ObjToString(l["modify"]),
-				Modifyid: qu.ObjToString(l["modifyid"]),
-				Code:     code,
-				Count:    qu.IntAll(l["count"]),
-				Event:    qu.IntAll(l["event"]),
-			}
-			SendFirstMap[code] = lua
-		}
-	}
-}
-
-func CreateTask() {
-	defer qu.Catch()
-	//1.sendfirst建任务(只建一次该任务)
-	CreateFirstCodeTask()
-	//2.根据区间轮循建任务
-	list, _ := MgoE.Find("luayearmincodeinterval", nil, nil, nil, false, -1, -1)
-	for _, l := range *list {
-		CreateTaskByInterval(l)
-	}
-
-}
-
-//根据区间建任务
-func CreateTaskByInterval(l map[string]interface{}) {
-	defer qu.Catch()
-	interval := qu.ObjToString(l["interval"])
-	qu.Debug(interval, "区间开始创建任务...")
-	timesnum := qu.IntAll(l["timesnum"])
-	cycletime := qu.IntAll(l["cycletime"])
-	ct_wg := &sync.WaitGroup{}
-	ct_lock := &sync.Mutex{}
-	ct_ch := make(chan bool, 3)
-	savetaskArr := []map[string]interface{}{}
-	updateArr := [][]map[string]interface{}{}
-	list, _ := MgoE.Find("luayearmincode", `{"interval":"`+interval+`","send":false}`, ``, `{"publishtime":0}`, false, 0, timesnum)
-	for _, l := range *list {
-		ct_wg.Add(1)
-		ct_ch <- true
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ct_ch
-				ct_wg.Done()
-			}()
-			update := []map[string]interface{}{ //更新
-				map[string]interface{}{"_id": tmp["_id"]},
-				map[string]interface{}{
-					"$set": map[string]interface{}{
-						"send": true,
-					},
-				},
-			}
-			code := qu.ObjToString(tmp["code"])
-			description := ""
-			state := 0 //任务状态
-			/*
-				统计是否有已下几种情况,时间定为一周内数据:
-				1、统计spider_highlistdata是否有下载异常数据
-				2、统计spider_warn异常数据(发布时间异常、乱码)
-				3、统计spider_sitecheck 站点异常爬虫(404)
-			*/
-			stime, etime := GetTime(-cycletime), GetTime(0)
-			//统计周期内下载量
-			query := map[string]interface{}{
-				"spidercode": code,
-				"l_np_publishtime": map[string]interface{}{
-					"$gte": stime,
-					"$lte": etime,
-				},
-			}
-			downloadnum := MgoS.Count("data_bak", query)
-			//1、下载异常
-			query = map[string]interface{}{
-				"comeintime": map[string]interface{}{
-					"$gte": stime,
-					"$lte": etime,
-				},
-				"state":      -1,
-				"spidercode": code,
-			}
-			data_downloaderr, _ := MgoS.Find("spider_highlistdata", query, `{"_id":-1}`, `{"href":1}`, false, 0, 10)
-			if data_downloaderr != nil && len(*data_downloaderr) > 0 {
-				if len(*data_downloaderr) == 10 {
-					state = 1
-				}
-				description += "下载异常:\n"
-				for _, derr := range *data_downloaderr {
-					description += qu.ObjToString(derr["href"]) + "\n"
-				}
-			}
-			//2、发布时间异常、乱码
-			query = map[string]interface{}{
-				"comeintime": map[string]interface{}{
-					"$gte": stime,
-					"$lte": etime,
-				},
-				"level": 2, //2:error数据 1:warn数据
-				"code":  code,
-			}
-			data_warn, _ := MgoS.Find("spider_warn", query, `{"_id":-1}`, `{"href":1,"field":1}`, false, 0, 10)
-			if data_warn != nil && len(*data_warn) > 0 {
-				destmp_publishtime := "发布时间异常:\n"
-				destmp_code := "正文标题异常:\n"
-				for _, dw := range *data_warn {
-					field := qu.ObjToString(dw["field"])
-					if field == "publishtime" {
-						state = 1
-						destmp_publishtime += qu.ObjToString(dw["href"]) + "\n"
-					} else {
-						destmp_code += qu.ObjToString(dw["href"]) + "\n"
-					}
-				}
-				description += destmp_code
-				description += destmp_publishtime
-			}
-			//3、404
-			query = map[string]interface{}{
-				"comeintime": map[string]interface{}{
-					"$gte": stime,
-					"$lte": etime,
-				},
-				"statuscode": 404,
-				"code":       code,
-			}
-			data_404, _ := MgoS.FindOne("spider_sitecheck", query)
-			if data_404 != nil && len(*data_404) > 0 {
-				if downloadnum == 0 { //有采集数据,不认为是404
-					state = 1
-					description += "网站监测:404\n" + qu.ObjToString((*data_404)["url"]) + "\n"
-				}
-			}
-			result := map[string]interface{}{}
-			result["s_code"] = code
-			result["s_site"] = tmp["site"]
-			result["s_channel"] = tmp["channel"]
-			result["s_descript"] = description
-			result["l_comeintime"] = time.Now().Unix()
-			result["l_complete"] = time.Now().AddDate(0, 0, cycletime).Unix()
-			result["s_modifyid"] = tmp["modifyid"]
-			result["s_modify"] = tmp["modify"]
-			result["i_event"] = tmp["event"]
-			result["s_source"] = "程序"
-			result["i_num"] = downloadnum
-			result["i_min"] = 0
-			result["i_state"] = state
-			result["s_type"] = "7"
-			result["s_urgency"] = "1"
-			result["i_times"] = 0
-			result["s_downloadtime"] = qu.FormatDateByInt64(&stime, qu.Date_Full_Layout) + "/" + qu.FormatDateByInt64(&etime, qu.Date_Full_Layout)
-			ct_lock.Lock()
-			savetaskArr = append(savetaskArr, result)
-			updateArr = append(updateArr, update)
-			ct_lock.Unlock()
-		}(l)
-	}
-	ct_wg.Wait()
-	ct_lock.Lock()
-	if len(savetaskArr) > 0 {
-		MgoE.SaveBulk("task", savetaskArr...)
-		savetaskArr = []map[string]interface{}{}
-	}
-	if len(updateArr) > 0 {
-		MgoE.UpdateBulk("luayearmincode", updateArr...)
-		updateArr = [][]map[string]interface{}{}
-	}
-	ct_lock.Unlock()
-	//time.AfterFunc(time.Duration(cycletime)*time.Second, func() { CreateTaskByInterval(l) })
-	time.AfterFunc(time.Duration(cycletime*24)*time.Hour, func() { CreateTaskByInterval(l) })
-}
-
-//历史数据采集为0的建任务
-func CreateFirstCodeTask() {
-	defer qu.Catch()
-	qu.Debug("开始创建sendfirst任务...")
-	stime := time.Now().AddDate(-1, 0, 0).Unix()
-	etime := GetTime(0)
-	cl_wg := &sync.WaitGroup{}
-	cl_lock := &sync.Mutex{}
-	cl_ch := make(chan bool, 3)
-	savetaskArr := []map[string]interface{}{}
-	updateArr := [][]map[string]interface{}{}
-	for _, lua := range SendFirstMap {
-		cl_wg.Add(1)
-		cl_ch <- true
-		go func(l *Lua) {
-			defer func() {
-				<-cl_ch
-				cl_wg.Done()
-			}()
-			update := []map[string]interface{}{ //更新
-				map[string]interface{}{"code": l.Code},
-				map[string]interface{}{
-					"$set": map[string]interface{}{
-						"send": true,
-					},
-				},
-			}
-			result := map[string]interface{}{}
-			result["s_code"] = l.Code
-			result["s_site"] = l.Site
-			result["s_channel"] = l.Channel
-			result["s_descript"] = "下载量异常:\n一年内数据下载量:" + fmt.Sprint(l.Count)
-			result["l_comeintime"] = time.Now().Unix()
-			result["l_complete"] = time.Now().AddDate(1, 0, 0).Unix()
-			result["s_modifyid"] = l.Modifyid
-			result["s_modify"] = l.Modify
-			result["i_event"] = l.Event
-			result["s_source"] = "程序"
-			result["i_num"] = l.Count
-			result["i_min"] = 0
-			result["i_state"] = 0
-			result["s_type"] = "10"
-			result["s_urgency"] = "1"
-			result["i_times"] = 0
-			result["s_downloadtime"] = qu.FormatDateByInt64(&stime, qu.Date_Full_Layout) + "/" + qu.FormatDateByInt64(&etime, qu.Date_Full_Layout)
-			cl_lock.Lock()
-			savetaskArr = append(savetaskArr, result)
-			updateArr = append(updateArr, update)
-			if len(savetaskArr) > 500 {
-				MgoE.SaveBulk("task", savetaskArr...)
-				savetaskArr = []map[string]interface{}{}
-			}
-			if len(updateArr) > 500 {
-				MgoE.UpdateBulk("luayearmincode", updateArr...)
-				updateArr = [][]map[string]interface{}{}
-			}
-			cl_lock.Unlock()
-		}(lua)
-	}
-	cl_wg.Wait()
-	cl_lock.Lock()
-	if len(savetaskArr) > 0 {
-		MgoE.SaveBulk("task", savetaskArr...)
-		savetaskArr = []map[string]interface{}{}
-	}
-	if len(updateArr) > 0 {
-		MgoE.UpdateBulk("luayearmincode", updateArr...)
-		updateArr = [][]map[string]interface{}{}
-	}
-	cl_lock.Unlock()
-	SendFirstMap = map[string]*Lua{}
-	qu.Debug("sendfirst任务创建完毕...")
-}
-
-//计算循环周期和每轮新建任务爬虫的个数
-func CycleTime() {
-	defer qu.Catch()
-	for k, interval := range IntervalMap {
-		cycletime := -1
-		if k == 1 { //区间在[0,1),循环周期设置为10天
-			cycletime = 10
-		} else if k == 2 || k == 3 { //confinval最大值都在x以下,可设置为x天
-			list, _ := MgoE.Find("luayearmincode", `{"interval":"`+interval+`"}`, `{"confinval":-1}`, `{"confinval":1}`, false, 0, 1)
-			if list != nil && len(*list) == 1 {
-				cycletime = qu.IntAll((*list)[0]["confinval"])
-			}
-		} else if k == 4 || k == 5 || k == 6 { //最大值90%都在x以下,可设置为x天
-			percent := 0.9
-			if k == 6 {
-				percent = 0.5
-			}
-			count := MgoE.Count("luayearmincode", `{"interval":"`+interval+`"}`)
-			index := int(math.Floor(float64(count) * percent))
-			list, _ := MgoE.Find("luayearmincode", `{"interval":"`+interval+`"}`, `{"confinval":1}`, `{"confinval":1}`, false, 0, index+1)
-			if list != nil && len(*list) == index+1 {
-				cycletime = qu.IntAll((*list)[index]["confinval"])
-			}
-		} else if k == 7 {
-			cycletime = 180
-		}
-		updata := map[string]interface{}{
-			"$set": map[string]interface{}{
-				"cycletime": cycletime,
-				"send":      false,
-			},
-		}
-		MgoE.Update("luayearmincode", `{"interval":"`+interval+`"}`, updata, false, true)
-		q := map[string]interface{}{
-			"interval": interval,
-			"sendfirst": map[string]interface{}{
-				"$exists": false,
-			},
-		}
-		count := MgoE.Count("luayearmincode", q)
-		t := float64((count * cycletime)) / float64((30 * IntervalRotateTime[interval]))
-		rotateNum := math.Ceil(t)
-		text := interval + ",总数:" + fmt.Sprint(count) + "," + fmt.Sprint(30*IntervalRotateTime[interval]) + "天发送完毕。每" + fmt.Sprint(cycletime) + "天轮循一次,一次发送" + fmt.Sprint(rotateNum) + "条"
-		qu.Debug(text)
-		MgoE.Save("luayearmincodeinterval", map[string]interface{}{"interval": interval, "timesnum": int(rotateNum), "cycletime": cycletime, "text": text})
-	}
-}
-
-//标记数据
-func TagCode() {
-	defer qu.Catch()
-	sess := MgoE.GetMgoConn()
-	defer MgoE.DestoryMongoConn(sess)
-	ch := make(chan bool, 3)
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	arr := [][]map[string]interface{}{}
-	it := sess.DB("editor").C("luayearmincode").Find(nil).Iter()
-	n := 0
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		ch <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			update := []map[string]interface{}{}
-			update = append(update, map[string]interface{}{"_id": tmp["_id"]})
-			set := map[string]interface{}{}
-			//code := qu.ObjToString(tmp["code"])
-			count := qu.IntAll(tmp["count"])
-			if count == 1 || count == 0 { //爬虫下载量为1,放入第7区间
-				set["interval"] = IntervalMap[7]
-				if count == 0 {
-					set["sendfirst"] = true
-				}
-			} else {
-				var tmpArr Int64Slice
-				for _, tp := range tmp["publishtime"].([]interface{}) {
-					tmpArr = append(tmpArr, tp.(int64))
-				}
-				sort.Sort(tmpArr) //发布时间排序
-				//
-				intervalNumArr := map[int][]float64{} //记录每个区间发布时间间隔信息
-				for i, p := range tmpArr {
-					if i == 0 {
-						continue
-					}
-					dval := float64(p-tmpArr[i-1]) / 86400
-					//计算区间
-					intervalNum := -1                        //区间
-					for j, pi := range PublishtimeInterval { //1.0, 3.0, 10.0, 20.0, 31.0, 93.0
-						if dval == pi {
-							intervalNum = j + 2
-							break
-						} else if dval < pi {
-							intervalNum = j + 1
-							break
-						}
-					}
-					if intervalNum == -1 { //如果为初始值,证明dval大于93
-						intervalNum = 7
-					}
-					intervalNumArr[intervalNum] = append(intervalNumArr[intervalNum], dval)
-				}
-				//
-				maxIn := 0    //记录最大区间
-				maxInLen := 0 //记录最大区间长度
-				flag := true  //记录是否只有第一区间有值
-				for in := 1; in <= 7; in++ {
-					lens := len(intervalNumArr[in])
-					if (in == 1 && lens == 0) || (in != 1 && lens > 0) {
-						flag = false
-					}
-					if in != 1 && lens >= maxInLen {
-						maxInLen = lens
-						maxIn = in
-					}
-				}
-				//qu.Debug(flag, "最大区间:", maxIn, "最大区间长度:", maxInLen)
-				if flag { //只有第一区间有值
-					if count < IntervalMaxNum { //划分到第七区间,直接新建任务
-						set["sendfirst"] = true
-						set["interval"] = IntervalMap[7]
-					} else {
-						set["interval"] = IntervalMap[1]
-					}
-				} else if maxIn != 0 && maxInLen != 0 {
-					sumInval := float64(0)
-					for _, inval := range intervalNumArr[maxIn] {
-						sumInval += inval
-					}
-					mean := sumInval / float64(maxInLen)
-					se := mean / math.Pow(float64(maxInLen), 0.5)
-					confInval := math.Ceil(mean + se*2.32)
-					set["confinval"] = int(confInval) //置信区间
-					set["interval"] = IntervalMap[maxIn]
-				} else {
-					qu.Debug("错误数据id:", tmp["_id"])
-				}
-			}
-			if len(set) > 0 {
-				update = append(update, map[string]interface{}{"$set": set})
-			}
-			lock.Lock()
-			if len(update) == 2 {
-				arr = append(arr, update)
-			}
-			if len(arr) >= 500 {
-				tmps := arr
-				MgoE.UpdateBulk("luayearmincode", tmps...)
-				arr = [][]map[string]interface{}{}
-			}
-			lock.Unlock()
-		}(tmp)
-		if n%1000 == 0 {
-			qu.Debug("current:", n)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	if len(arr) > 0 {
-		MgoE.UpdateBulk("luayearmincode", arr...)
-		arr = [][]map[string]interface{}{}
-	}
-	qu.Debug("标记完成")
-}
-
-//统计爬虫下载量
-func GetSpidercode() {
-	defer qu.Catch()
-	query := map[string]interface{}{
-		"$or": []interface{}{
-			map[string]interface{}{"state": 5},
-			map[string]interface{}{
-				"state": map[string]interface{}{
-					"$in": []int{0, 1, 2},
-				},
-				"event": map[string]interface{}{
-					"$ne": 7000,
-				},
-			},
-		},
-	}
-	codeMap := map[string]*Lua{}
-	luas, _ := MgoE.Find("luaconfig", query, nil, `{"code":1,"event":1,"param_common":1,"createuser":1,"createuserid":1}`, false, -1, -1)
-	for _, l := range *luas {
-		pc := l["param_common"].([]interface{})
-		lua := &Lua{
-			Modify:   qu.ObjToString(l["createuser"]),
-			Modifyid: qu.ObjToString(l["createuserid"]),
-			Event:    qu.IntAll(l["event"]),
-		}
-		if len(pc) > 2 {
-			lua.Site = qu.ObjToString(pc[1])
-			lua.Channel = qu.ObjToString(pc[2])
-		}
-		code := qu.ObjToString(l["code"])
-		codeMap[code] = lua
-	}
-	qu.Debug("开始统计...", len(codeMap))
-	sess := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess)
-	q := map[string]interface{}{
-		"publishtime": map[string]interface{}{
-			"$gte": time.Now().AddDate(-1, 0, 0).Unix(),
-			"$lte": time.Now().Unix(),
-		},
-	}
-	f := map[string]interface{}{
-		"spidercode":  1,
-		"publishtime": 1,
-	}
-	ch := make(chan bool, 5)
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	codeNum := map[string]int{}
-	codePublishtime := map[string][]int64{}
-	i := 0
-	it1 := sess.DB("spider").C("data_bak").Find(&q).Select(&f).Iter()
-	for tmp := make(map[string]interface{}); it1.Next(&tmp); i++ {
-		wg.Add(1)
-		ch <- true
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			publishtime := qu.Int64All(tmp["publishtime"])
-			if publishtime > 0 {
-				spidercode := qu.ObjToString(tmp["spidercode"])
-				if codeMap[spidercode] != nil {
-					lock.Lock()
-					codeNum[spidercode] += 1
-					if codeNum[spidercode] > YearMinDownloadNum {
-						lock.Unlock()
-						return
-					}
-					codePublishtime[spidercode] = append(codePublishtime[spidercode], publishtime)
-					lock.Unlock()
-				}
-			}
-		}(tmp)
-		if i%1000 == 0 {
-			qu.Debug(i)
-		}
-		tmp = map[string]interface{}{}
-	}
-	qu.Debug("data_bak查询完毕", len(codeNum))
-	i = 0
-	it2 := sess.DB("spider").C("data_bak_202011030854").Find(&q).Select(&f).Iter()
-	for tmp := make(map[string]interface{}); it2.Next(&tmp); i++ {
-		wg.Add(1)
-		ch <- true
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			publishtime := qu.Int64All(tmp["publishtime"])
-			if publishtime > 0 {
-				spidercode := qu.ObjToString(tmp["spidercode"])
-				if codeMap[spidercode] != nil {
-					lock.Lock()
-					codeNum[spidercode] += 1
-					if codeNum[spidercode] > YearMinDownloadNum {
-						lock.Unlock()
-						return
-					}
-					codePublishtime[spidercode] = append(codePublishtime[spidercode], publishtime)
-					lock.Unlock()
-				}
-			}
-		}(tmp)
-		if i%1000 == 0 {
-			qu.Debug(i)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	qu.Debug("data_bak_202011030854查询完毕", len(codeNum))
-	for code, num := range codeNum {
-		lua := codeMap[code]
-		delete(codeMap, code)
-		if num <= YearMinDownloadNum {
-			parr := codePublishtime[code]
-			//sort.Sort(parr)
-			MgoE.Save("luayearmincode", map[string]interface{}{"code": code, "count": num, "publishtime": parr, "event": lua.Event, "site": lua.Site, "channel": lua.Channel, "modify": lua.Modify, "modifyid": lua.Modifyid})
-		}
-	}
-	for code, lua := range codeMap { //下载量为0
-		MgoE.Save("luayearmincode", map[string]interface{}{"code": code, "count": 0, "event": lua.Event, "site": lua.Site, "channel": lua.Channel, "modify": lua.Modify, "modifyid": lua.Modifyid, "publishtime": []int64{}})
-	}
-	qu.Debug("统计完毕...")
-}
-
-//补充信息
-func getlua() {
-	luas, _ := MgoE.Find("luaconfig", nil, nil, `{"code":1,"event":1,"param_common":1,"createuser":1,"createuserid":1}`, false, -1, -1)
-	for i, l := range *luas {
-		qu.Debug(i)
-		pc := l["param_common"].([]interface{})
-		Site := ""
-		Channel := ""
-		if len(pc) > 2 {
-			Site = qu.ObjToString(pc[1])
-			Channel = qu.ObjToString(pc[2])
-		}
-		Modify := qu.ObjToString(l["createuser"])
-		Modifyid := qu.ObjToString(l["createuserid"])
-		code := qu.ObjToString(l["code"])
-		MgoE.Update("luayearmincode", `{"code":"`+code+`"}`, map[string]interface{}{"$set": map[string]interface{}{"site": Site, "channel": Channel, "modify": Modify, "modifyid": Modifyid}}, false, false)
-	}
-}
-
-//分组查询
-func GetSpidercode_back() {
-	defer qu.Catch()
-	qu.Debug("开始统计...")
-	sess := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess)
-	q := map[string]interface{}{
-		"publishtime": map[string]interface{}{
-			"$gte": time.Now().AddDate(-1, 0, 0).Unix(),
-			"$lte": time.Now().Unix(),
-		},
-	}
-	g := map[string]interface{}{
-		"_id":   "$spidercode",
-		"count": map[string]interface{}{"$sum": 1},
-	}
-	pro := map[string]interface{}{
-		"spidercode": 1,
-	}
-	s := map[string]interface{}{
-		"count": 1,
-	}
-	p := []map[string]interface{}{
-		map[string]interface{}{"$match": q},
-		map[string]interface{}{"$project": pro},
-		map[string]interface{}{"$group": g},
-		map[string]interface{}{"$sort": s},
-	}
-	it1 := sess.DB("spider").C("data_bak").Pipe(p).Iter()
-	codeCount := map[string]int{}
-	i := 0
-	for tmp := make(map[string]interface{}); it1.Next(&tmp); i++ {
-		code := qu.ObjToString(tmp["_id"])
-		count := qu.IntAll(tmp["count"])
-		qu.Debug(code, count)
-		if count <= YearMinDownloadNum {
-			codeCount[code] = count
-		} else {
-			break
-		}
-		if i%50 == 0 {
-			qu.Debug(i)
-		}
-	}
-	i = 0
-	it2 := sess.DB("spider").C("data_bak_202011030854").Pipe(p).Iter()
-	for tmp := make(map[string]interface{}); it2.Next(&tmp); i++ {
-		code := qu.ObjToString(tmp["_id"])
-		count := qu.IntAll(tmp["count"])
-		qu.Debug(code, count)
-		if count <= YearMinDownloadNum {
-			codeCount[code] += count
-		} else {
-			break
-		}
-		if i%50 == 0 {
-			qu.Debug(i)
-		}
-	}
-	for code, count := range codeCount {
-		if count <= 100 {
-			MgoE.Save("luayearmincode", map[string]interface{}{"code": code, "count": count})
-		}
-	}
-	qu.Debug("统计数量完毕...")
-	list, _ := MgoE.Find("luayearmincode", nil, nil, nil, false, -1, -1)
-	for _, l := range *list {
-		code := qu.ObjToString(l["code"])
-		count := qu.IntAll(l["count"])
-		if count > YearMinDownloadNum {
-			continue
-		}
-		d1s, _ := MgoS.Find("data_bak", `{"spidercode":"`+code+`"}`, nil, `{"publishtime":1}`, false, -1, -1)
-		d2s, _ := MgoS.Find("data_bak_202011030854", `{"spidercode":"`+code+`"}`, nil, `{"publishtime":1}`, false, -1, -1)
-		var publishtimeArr Int64Slice
-		for _, d1 := range *d1s {
-			publishtime := qu.Int64All(d1["publishtime"])
-			if publishtime > 0 {
-				publishtimeArr = append(publishtimeArr, publishtime)
-			}
-		}
-		for _, d2 := range *d2s {
-			publishtime := qu.Int64All(d2["publishtime"])
-			if publishtime > 0 {
-				publishtimeArr = append(publishtimeArr, publishtime)
-			}
-		}
-		sort.Sort(publishtimeArr)
-		MgoE.Update("luayearmincode", map[string]interface{}{"_id": l["_id"]}, map[string]interface{}{"$set": map[string]interface{}{"publishtime": publishtimeArr}}, false, false)
-	}
-	qu.Debug("统计完毕...")
-}
-
-//自定义[]int64数组排序
-type Int64Slice []int64
-
-func (p Int64Slice) Len() int           { return len(p) }
-func (p Int64Slice) Less(i, j int) bool { return p[i] < p[j] }
-func (p Int64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }

+ 77 - 5
src/config.json

@@ -1,19 +1,91 @@
 {
 	"spider":{
-    		"addr": "192.168.3.207:27092",
+		"addr": "192.168.3.207:27092",
 		"db": "spider",
+		"pythondb": "py_spider",
 		"size": 15
     },
     "editor": {
-    		"addr": "192.168.3.207:27092",
+		"addr": "192.168.3.207:27092",
 		"db": "editor",
 		"size": 15
     },
-    "codeinfocron": "0 5 0 ? * MON-FRI",
+	"pyspider":{
+		"addr": "192.168.3.207:27092",
+		"db": "spider",
+		"size": 5
+	},
+	"msgservers": {
+		"comm": {
+			"addr": "spdata.jianyu360.com:801",
+			"name": "编辑器_队列节点"
+		},
+		"bid": {
+			"addr": "spdata.jianyu360.com:803",
+			"name": "编辑器_并发节点"
+		}
+	},
+	"eventsinfo": {
+		"7000": {
+			"server": "bid",
+			"model": 0,
+			"work": 0
+		},
+		"7100": {
+			"server": "bid",
+			"model": 1,
+			"work": 0
+		},
+		"7110": {
+			"server": "comm",
+			"model": 1,
+			"work": 0
+		},
+		"7200": {
+			"server": "comm",
+			"model": 1,
+			"work": 1
+		},
+		"7210": {
+			"server": "comm",
+			"model": 1,
+			"work": 1
+		},
+		"7300": {
+			"server": "comm",
+			"model": 1,
+			"work": 1
+		},
+		"7310": {
+			"server": "comm",
+			"model": 1,
+			"work": 1
+		},
+		"7400": {
+			"server": "bid",
+			"model": 1,
+			"work": 0
+		},
+		"7410": {
+			"server": "bid",
+			"model": 0,
+			"work": 0
+		},
+		"7500": {
+			"server": "comm",
+			"model": 0,
+			"work": 1
+		},
+		"7700": {
+			"server": "comm",
+			"model": 0,
+			"work": 1
+		}
+	},
     "startaskcron": "0 0 1 ? * MON-FRI",
-    "updatestatecron": "0 0 1 ? * SAT,SUN",
 	"codesummarycron": "0 30 8 ? * *",
-	"randomdatapushcron": "0 50 8 ? * *",
+	"randomdatapushcron": "0 50 8 ? * MON-FRI",
+	"qyworkremindcron": "0 0 9 ? * MON-FRI",
 	"closenum": 2,
 	"daynum": 6
 }

+ 0 - 188
src/downloadnum.go

@@ -1,188 +0,0 @@
-package main
-
-import (
-	"math"
-	qu "qfw/util"
-	"strconv"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/donnie4w/go-logger/logger"
-)
-
-var (
-	DownloadCheck map[string]*DC
-)
-
-type DC struct {
-	DownRatio float64 //下浮比例
-	UpRatio   float64 //上浮比例
-}
-
-//统计工作日(除周六周日)爬虫每天的下载量,并更新
-func GetDownloadNumber() {
-	defer qu.Catch()
-	logger.Debug("---统计爬虫每日下载量---")
-	defer func() {
-		logger.Debug("---统计爬虫每日下载量完毕---")
-	}()
-	weekDay := time.Now().Weekday().String()
-	if weekDay != "Saturday" && weekDay != "Sunday" { //周二至周五统计前一天的下载量,周一统计上周五的数据
-		yesterday := -1
-		if weekDay == "Monday" {
-			yesterday = -3
-		}
-		timeStr := time.Now().AddDate(0, 0, yesterday).Format("2006-01-02")
-		startTime := GetTime(yesterday)
-		endTime := startTime + 86400
-		spiders := getAllSpider() //获取所有已运行爬虫
-		logger.Debug(timeStr, "上架的爬虫个数:", len(spiders))
-		lock := &sync.Mutex{}
-		wg := &sync.WaitGroup{}
-		ch := make(chan bool, 5)
-		arr := [][]map[string]interface{}{}
-		for code, reps := range spiders {
-			ch <- true
-			wg.Add(1)
-			go func(code string, reps map[string]interface{}) {
-				defer func() {
-					<-ch
-					wg.Done()
-				}()
-				update := []map[string]interface{}{}
-				update = append(update, map[string]interface{}{"code": code})
-				num := MgoS.Count("data_bak", map[string]interface{}{ //统计某个爬虫上个工作日的采集量
-					"spidercode": code,
-					"l_np_publishtime": map[string]interface{}{
-						"$gte": startTime,
-						"$lte": endTime,
-					}})
-				logger.Debug(code, timeStr+"下载量:", num)
-				numStr := strconv.Itoa(num) //数量字符串
-				//将下载量存库
-				data, _ := MgoS.FindOne("spider_download", map[string]interface{}{"code": code})
-				if data != nil && len(*data) > 0 { //已有爬虫下载量信息
-					timeAndNum := qu.ObjArrToStringArr((*data)["timeAndNum"].([]interface{}))
-					if len(timeAndNum) >= 15 { //只统计15天的量,超过15天去除第一个
-						timeAndNum = timeAndNum[1:]
-					}
-					timeAndNum = append(timeAndNum, timeStr+":"+numStr)
-					i := 0
-					y := 0
-					for _, tn := range timeAndNum {
-						nStr := strings.Split(tn, ":")[1]
-						nInt, _ := strconv.Atoi(nStr)
-						if nInt > 0 || y > 0 { //不统计下载量大于0之前的数据
-							y++
-							i += nInt
-						}
-					}
-					avFlush := float64(0)
-					if y > 0 { //不全为0
-						iF, _ := strconv.ParseFloat(strconv.Itoa(i), 64)
-						yF, _ := strconv.ParseFloat(strconv.Itoa(y), 64)
-						avFlush = math.Ceil(iF / yF) //平均值()向上取整
-					}
-					//根据浮动和平均值计算范围值
-					dr, ur := float64(0), float64(0)
-					if DownloadCheck[code] == nil {
-						dr = DownloadCheck["other"].DownRatio
-						ur = DownloadCheck["other"].UpRatio
-					} else {
-						dr = DownloadCheck[code].DownRatio
-						ur = DownloadCheck[code].UpRatio
-					}
-					min := dr * avFlush
-					max := ur * avFlush
-					numerr := float64(num) >= min && float64(num) <= max //超出范围
-					if avFlush == 0 {                                    //平均值0,下载量0,建异常任务
-						numerr = false
-					}
-					update = append(update, map[string]interface{}{
-						"$set": map[string]interface{}{
-							"timeAndNum":      timeAndNum,                   //时间下载量集合
-							"downloadNum":     map[string]int{timeStr: num}, //前一天下载量
-							"averageDownload": avFlush,                      //平均值
-							"isok":            numerr,                       //下载量是否异常
-							"min":             min,                          //采集量下限
-							"max":             max,                          //采集量上限
-							"updatetime":      time.Now().Unix(),            //更新时间
-						},
-					})
-				} else { //新增信息
-					numF, _ := strconv.ParseFloat(strconv.Itoa(num), 64)
-					update = append(update, map[string]interface{}{
-						"$set": map[string]interface{}{
-							"timeAndNum":      []string{timeStr + ":" + numStr},
-							"code":            code,
-							"averageDownload": numF,
-							"downloadNum":     map[string]int{timeStr: num},
-							"isok":            true,
-							"site":            reps["s_site"],
-							"channel":         reps["s_channel"],
-							"comeintime":      time.Now().Unix(),
-						},
-					})
-				}
-				lock.Lock()
-				if len(update) == 2 {
-					arr = append(arr, update)
-				}
-				if len(arr) > 500 {
-					tmps := arr
-					MgoS.UpSertBulk("spider_download", tmps...)
-					arr = [][]map[string]interface{}{}
-				}
-				lock.Unlock()
-			}(code, reps)
-		}
-		wg.Wait()
-		lock.Lock()
-		if len(arr) > 0 {
-			MgoS.UpSertBulk("spider_download", arr...)
-			arr = [][]map[string]interface{}{}
-		}
-		lock.Unlock()
-	} else {
-		return
-	}
-}
-
-//获取所有爬虫
-func getAllSpider() map[string]map[string]interface{} {
-	fields := map[string]interface{}{
-		"code":         1,
-		"param_common": 1,
-	}
-	query := map[string]interface{}{
-		"$or": []interface{}{
-			map[string]interface{}{"state": 5},
-			map[string]interface{}{
-				"state": map[string]interface{}{
-					"$in": []int{0, 1, 2},
-				},
-				"event": map[string]interface{}{
-					"$ne": 7000,
-				},
-			},
-		},
-	}
-	luas, _ := MgoE.Find("luaconfig", query, nil, fields, false, -1, -1)
-	reps := map[string]map[string]interface{}{}
-	for _, lua := range *luas {
-		rep := map[string]interface{}{}
-		code := qu.ObjToString(lua["code"])
-		rep["s_code"] = code
-		if param_common, ok := lua["param_common"].([]interface{}); ok {
-			rep["s_site"] = param_common[1]
-			if len(param_common) > 2 {
-				rep["s_channel"] = param_common[2]
-			} else {
-				rep["s_channel"] = ""
-			}
-		}
-		reps[code] = rep
-	}
-	return reps
-}

File diff suppressed because it is too large
+ 453 - 0
src/logs/task.log


+ 178 - 0
src/luatask/downloadnum.go

@@ -0,0 +1,178 @@
+package luatask
+
+import (
+	qu "qfw/util"
+	"util"
+)
+
+type DC struct {
+	DownRatio float64 //下浮比例
+	UpRatio   float64 //上浮比例
+}
+
+//统计工作日(除周六周日)爬虫每天的下载量,并更新
+// func GetDownloadNumber() {
+// 	defer qu.Catch()
+// 	logger.Debug("---统计爬虫每日下载量---")
+// 	defer func() {
+// 		logger.Debug("---统计爬虫每日下载量完毕---")
+// 	}()
+// 	weekDay := time.Now().Weekday().String()
+// 	if weekDay != "Saturday" && weekDay != "Sunday" { //周二至周五统计前一天的下载量,周一统计上周五的数据
+// 		yesterday := -1
+// 		if weekDay == "Monday" {
+// 			yesterday = -3
+// 		}
+// 		timeStr := time.Now().AddDate(0, 0, yesterday).Format("2006-01-02")
+// 		startTime := util.GetTime(yesterday)
+// 		endTime := startTime + 86400
+// 		spiders := getAllSpider() //获取所有已运行爬虫
+// 		logger.Debug(timeStr, "上架的爬虫个数:", len(spiders))
+// 		lock := &sync.Mutex{}
+// 		wg := &sync.WaitGroup{}
+// 		ch := make(chan bool, 5)
+// 		arr := [][]map[string]interface{}{}
+// 		for code, reps := range spiders {
+// 			ch <- true
+// 			wg.Add(1)
+// 			go func(code string, reps map[string]interface{}) {
+// 				defer func() {
+// 					<-ch
+// 					wg.Done()
+// 				}()
+// 				update := []map[string]interface{}{}
+// 				update = append(update, map[string]interface{}{"code": code})
+// 				num := util.MgoS.Count("data_bak", map[string]interface{}{ //统计某个爬虫上个工作日的采集量
+// 					"spidercode": code,
+// 					"l_np_publishtime": map[string]interface{}{
+// 						"$gte": startTime,
+// 						"$lte": endTime,
+// 					}})
+// 				logger.Debug(code, timeStr+"下载量:", num)
+// 				numStr := strconv.Itoa(num) //数量字符串
+// 				//将下载量存库
+// 				data, _ := util.MgoS.FindOne("spider_download", map[string]interface{}{"code": code})
+// 				if data != nil && len(*data) > 0 { //已有爬虫下载量信息
+// 					timeAndNum := qu.ObjArrToStringArr((*data)["timeAndNum"].([]interface{}))
+// 					if len(timeAndNum) >= 15 { //只统计15天的量,超过15天去除第一个
+// 						timeAndNum = timeAndNum[1:]
+// 					}
+// 					timeAndNum = append(timeAndNum, timeStr+":"+numStr)
+// 					i := 0
+// 					y := 0
+// 					for _, tn := range timeAndNum {
+// 						nStr := strings.Split(tn, ":")[1]
+// 						nInt, _ := strconv.Atoi(nStr)
+// 						if nInt > 0 || y > 0 { //不统计下载量大于0之前的数据
+// 							y++
+// 							i += nInt
+// 						}
+// 					}
+// 					avFlush := float64(0)
+// 					if y > 0 { //不全为0
+// 						iF, _ := strconv.ParseFloat(strconv.Itoa(i), 64)
+// 						yF, _ := strconv.ParseFloat(strconv.Itoa(y), 64)
+// 						avFlush = math.Ceil(iF / yF) //平均值()向上取整
+// 					}
+// 					//根据浮动和平均值计算范围值
+// 					dr, ur := float64(0), float64(0)
+// 					if DownloadCheck[code] == nil {
+// 						dr = DownloadCheck["other"].DownRatio
+// 						ur = DownloadCheck["other"].UpRatio
+// 					} else {
+// 						dr = DownloadCheck[code].DownRatio
+// 						ur = DownloadCheck[code].UpRatio
+// 					}
+// 					min := dr * avFlush
+// 					max := ur * avFlush
+// 					numerr := float64(num) >= min && float64(num) <= max //超出范围
+// 					if avFlush == 0 {                                    //平均值0,下载量0,建异常任务
+// 						numerr = false
+// 					}
+// 					update = append(update, map[string]interface{}{
+// 						"$set": map[string]interface{}{
+// 							"timeAndNum":      timeAndNum,                   //时间下载量集合
+// 							"downloadNum":     map[string]int{timeStr: num}, //前一天下载量
+// 							"averageDownload": avFlush,                      //平均值
+// 							"isok":            numerr,                       //下载量是否异常
+// 							"min":             min,                          //采集量下限
+// 							"max":             max,                          //采集量上限
+// 							"updatetime":      time.Now().Unix(),            //更新时间
+// 						},
+// 					})
+// 				} else { //新增信息
+// 					numF, _ := strconv.ParseFloat(strconv.Itoa(num), 64)
+// 					update = append(update, map[string]interface{}{
+// 						"$set": map[string]interface{}{
+// 							"timeAndNum":      []string{timeStr + ":" + numStr},
+// 							"code":            code,
+// 							"averageDownload": numF,
+// 							"downloadNum":     map[string]int{timeStr: num},
+// 							"isok":            true,
+// 							"site":            reps["s_site"],
+// 							"channel":         reps["s_channel"],
+// 							"comeintime":      time.Now().Unix(),
+// 						},
+// 					})
+// 				}
+// 				lock.Lock()
+// 				if len(update) == 2 {
+// 					arr = append(arr, update)
+// 				}
+// 				if len(arr) > 500 {
+// 					tmps := arr
+// 					MgoS.UpSertBulk("spider_download", tmps...)
+// 					arr = [][]map[string]interface{}{}
+// 				}
+// 				lock.Unlock()
+// 			}(code, reps)
+// 		}
+// 		wg.Wait()
+// 		lock.Lock()
+// 		if len(arr) > 0 {
+// 			MgoS.UpSertBulk("spider_download", arr...)
+// 			arr = [][]map[string]interface{}{}
+// 		}
+// 		lock.Unlock()
+// 	} else {
+// 		return
+// 	}
+// }
+
+//获取所有爬虫
+func getAllSpider() map[string]map[string]interface{} {
+	fields := map[string]interface{}{
+		"code":         1,
+		"param_common": 1,
+	}
+	query := map[string]interface{}{
+		"$or": []interface{}{
+			map[string]interface{}{"state": 5},
+			map[string]interface{}{
+				"state": map[string]interface{}{
+					"$in": []int{0, 1, 2},
+				},
+				"event": map[string]interface{}{
+					"$ne": 7000,
+				},
+			},
+		},
+	}
+	luas, _ := util.MgoE.Find("luaconfig", query, nil, fields, false, -1, -1)
+	reps := map[string]map[string]interface{}{}
+	for _, lua := range *luas {
+		rep := map[string]interface{}{}
+		code := qu.ObjToString(lua["code"])
+		rep["s_code"] = code
+		if param_common, ok := lua["param_common"].([]interface{}); ok {
+			rep["s_site"] = param_common[1]
+			if len(param_common) > 2 {
+				rep["s_channel"] = param_common[2]
+			} else {
+				rep["s_channel"] = ""
+			}
+		}
+		reps[code] = rep
+	}
+	return reps
+}

+ 1335 - 0
src/luatask/task.go

@@ -0,0 +1,1335 @@
+package luatask
+
+import (
+	"encoding/json"
+	"fmt"
+	qu "qfw/util"
+	"sync"
+	"time"
+	"util"
+
+	"github.com/donnie4w/go-logger/logger"
+)
+
+//采集频率异常、列表页异常、404异常、下载异常、运行异常、时间异常、数据异常
+const TASK_RATEERR, TASK_LISTERR, TASK_404ERR, TASK_DOWNLOADERR, TASK_RUNERR, TASK_TIMEERR, TASK_DATAERR = 8, 7, 6, 5, 4, 3, 2
+
+var CodeInfoMap map[string]*Spider
+var StateFeedBackErr = map[int]string{
+	0:   "timeout",
+	200: "analysis",
+	404: "download",
+	500: "server",
+}
+
+var PythonErrTypeInfoMap = map[string]ErrTypeInfo{
+	"download": ErrTypeInfo{
+		ErrType: TASK_404ERR,
+		Remark:  "下载异常",
+	},
+	"server": ErrTypeInfo{
+		ErrType: TASK_DOWNLOADERR,
+		Remark:  "服务异常",
+	},
+	"analysis": ErrTypeInfo{
+		ErrType: TASK_RUNERR,
+		Remark:  "解析异常",
+	},
+	"timeout": ErrTypeInfo{
+		ErrType: TASK_TIMEERR,
+		Remark:  "超时异常",
+	},
+}
+var LuaErrTypeInfoMap = map[string]ErrTypeInfo{
+	"download": ErrTypeInfo{
+		ErrType: TASK_DOWNLOADERR,
+		Remark:  "下载异常",
+	},
+	"regather": ErrTypeInfo{
+		ErrType: TASK_RUNERR,
+		Remark:  "运行异常",
+	},
+	"publishtime": ErrTypeInfo{
+		ErrType: TASK_TIMEERR,
+		Remark:  "时间异常",
+	},
+	"text": ErrTypeInfo{
+		ErrType: TASK_DATAERR,
+		Remark:  "数据异常",
+	},
+}
+
+//spider
+type Spider struct {
+	Site                 string                `json:"site"`                 //站点
+	Platform             string                `json:"platform"`             //平台
+	Code                 string                `json:"spidercode"`           //爬虫
+	Channel              string                `json:"channel"`              //栏目
+	AuditTime            int64                 `json:"audittime"`            //最新审核时间
+	ModifyUser           string                `json:"modifyuser"`           //维护人
+	ModifyId             string                `json:"modifyid"`             //维护人id
+	Event                int                   `json:"event"`                //节点
+	State                int                   `json:"state"`                //状态
+	FrequencyErrTimes    int                   `json:"frequencyerrtimes"`    //爬虫采集频率异常次数
+	MaxPage              int                   `json:"maxpage"`              //采集最大页
+	Model                int                   `json:"model"`                //采集模式(新\老) 0:老模式;1:新模式
+	Working              int                   `json:"working"`              //采集模式(高低\性能)0:高性能模式;1:队列模式
+	ListIsFilter         bool                  `json:"listisfilter"`         //lua列表页采集是否包含过滤
+	DownloadAllNum       int                   `json:"downloadallnum"`       //总下载量
+	DownloadSuccessNum   int                   `json:"downloadsuccessnum"`   //下载成功量
+	DownloadFailedNum    int                   `json:"downloadfailednum"`    //下载失败量
+	NoDownloadNum        int                   `json:"nodownloadnum"`        //未下载量
+	ListDownloadAllTimes int                   `json:"listdownloadalltimes"` //一天内列表页总下载次数
+	ListOhPercentTimes   int                   `json:"listohpercenttimes"`   //列表页采集百分百次数
+	ListNoDataTimes      int                   `json:"listnodatatimes"`      //一天内列表页下载无数据次数
+	Comeintime           int64                 `json:"comeintime"`           //入库时间
+	Error                map[string]*ErrorInfo `json:"error"`
+	//OhPercentTimes    int                   `json:"ohpercentimes"`     //采集量占总下载量100%的次数
+	//NtPercentTime     int                   `json:"ntpercentimes"`     //采集量占总下载量90%-100%的次数
+	//EtPercentTime     int                   `json:"etpercentimes"`     //采集量占总下载量80%-90%的次数
+}
+
+//spider:错误异常
+type ErrorInfo struct {
+	Num int          //错误条数
+	Err []*ErrRemark //错误详情
+}
+
+//spider
+type ErrRemark struct {
+	Href   string //链接
+	Remark string //异常说明
+}
+
+//task
+type Task struct {
+	Platform          string //平台
+	Code              string //爬虫代码
+	Site              string //站点
+	Channel           string //栏目
+	ModifyUser        string //维护人员
+	ModifyId          string //维护人员id
+	ErrType           int    //异常类型:8:采集频率异常;7:列表页异常;5:下载异常;4:运行异常;3:发布时间异常;2:数据异常;1:数据量异常
+	Description       string //描述
+	State             int    //状态
+	Event             int    //节点
+	Num               int    //下载量
+	FrequencyErrTimes int    //爬虫采集频率异常次数
+	DescribeMap       map[int]string
+	//ErrInfo     map[string]map[string]interface{} //异常集合
+}
+
+//task:任务异常类型信息
+type ErrTypeInfo struct {
+	ErrType int    //任务异常类型
+	Remark  string //异常类型说明
+}
+
+var (
+	StartTime   int64                     //上一个工作日的起始时间
+	EndTime     int64                     //上一个工作日的结束时间
+	TaskMap     map[string]*Task          //任务集合
+	UserTaskNum map[string]map[string]int //记录每人每天新建任务量
+	//
+)
+
+func StartTask() {
+	InitInfo() //初始化时间
+	logger.Debug(StartTime, EndTime)
+	PrapareCodeBaseInfo()      //初始化爬虫基本信息
+	GetSpiderListDownloadNum() //统计爬虫列表页下载量、下载失败量、未下载量
+	GetSpiderDownloadRateDataNew()
+	GetSpiderWarnErrData()
+	GetPythonWarnErrData()
+	//SaveCodeInfo()
+	CreateTaskProcess()
+	// GetDownloadNumber() //统计下载量
+	ResetDataState() //更新数据状态
+	//CloseTask()      //关闭任务
+}
+
+//初始化
+func InitInfo() {
+	defer qu.Catch()
+	CodeInfoMap = map[string]*Spider{} //初始化
+	UserTaskNum = map[string]map[string]int{}
+	StartTime, EndTime = util.GetWorkDayTimeUnix()
+	//StartTime = util.GetTime(-1)
+	//EndTime = util.GetTime(0)
+}
+
+// PrapareCodeBaseInfo 准备爬虫基本信息
+func PrapareCodeBaseInfo() {
+	defer qu.Catch()
+	sess := util.MgoE.GetMgoConn()
+	defer util.MgoE.DestoryMongoConn(sess)
+	lock := &sync.Mutex{}
+	wg := &sync.WaitGroup{}
+	ch := make(chan bool, 5)
+	query := map[string]interface{}{
+		"$or": []interface{}{
+			//lua、python上线爬虫
+			map[string]interface{}{
+				"state": map[string]interface{}{
+					"$in": []int{5, 11}, //上架、上线爬虫
+				},
+			},
+			//lua正在被维护的爬虫
+			map[string]interface{}{
+				"platform": "golua平台",
+				"state": map[string]interface{}{
+					"$in": []int{0, 1, 2}, //待完成、待审核、未通过
+				},
+				"event": map[string]interface{}{
+					"$ne": 7000,
+				},
+			},
+			//python正在被维护的爬虫
+			map[string]interface{}{
+				"platform": "python",
+				"state": map[string]interface{}{
+					"$in": []int{1, 2, 6}, //待审核、未通过
+				},
+			},
+		},
+	}
+	fieles := map[string]interface{}{
+		"event":             1,
+		"param_common":      1,
+		"platform":          1,
+		"modifyuser":        1,
+		"modifyuserid":      1,
+		"state":             1,
+		"l_uploadtime":      1,
+		"listisfilter":      1,
+		"frequencyerrtimes": 1,
+	}
+	count := util.MgoE.Count("luaconfig", query)
+	logger.Debug("共加载线上爬虫个数:", count)
+	it := sess.DB(util.MgoE.DbName).C("luaconfig").Find(&query).Select(&fieles).Iter()
+	n := 0
+	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			info := &Spider{
+				Error: map[string]*ErrorInfo{},
+			}
+			if param_common, ok := tmp["param_common"].([]interface{}); ok && len(param_common) >= 6 {
+				info.Code = qu.ObjToString(param_common[0])
+				info.Site = qu.ObjToString(param_common[1])
+				info.Channel = qu.ObjToString(param_common[2])
+				info.MaxPage = qu.IntAll(param_common[5])
+			} else {
+				logger.Debug("加载爬虫出错:", tmp["_id"])
+			}
+			info.ModifyUser = qu.ObjToString(tmp["modifyuser"])
+			info.ModifyId = qu.ObjToString(tmp["modifyuserid"])
+			info.AuditTime = qu.Int64All(tmp["l_uploadtime"])
+			info.Platform = qu.ObjToString(tmp["platform"])
+			info.Event = qu.IntAll(tmp["event"])
+			info.State = qu.IntAll(tmp["state"])
+			info.ListIsFilter = tmp["listisfilter"].(bool)
+			info.FrequencyErrTimes = qu.IntAll(tmp["frequencyerrtimes"])
+			info.Model = util.CodeEventModel[info.Event]
+			info.Working = util.CodeEventWorking[info.Event]
+			info.Comeintime = time.Now().Unix()
+			lock.Lock()
+			CodeInfoMap[info.Code] = info
+			lock.Unlock()
+		}(tmp)
+		if n%1000 == 0 {
+			logger.Debug(n)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("爬虫基本信息准备完成...", len(CodeInfoMap))
+}
+
+// GetSpiderListDownloadNum 统计爬虫列表页下载量和下载失败量
+func GetSpiderListDownloadNum() {
+	defer qu.Catch()
+	sess := util.MgoS.GetMgoConn()
+	defer util.MgoS.DestoryMongoConn(sess)
+	match := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": StartTime,
+			"$lt":  EndTime,
+		},
+	}
+	group1 := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"spidercode": "$spidercode",
+			"state":      "$state",
+		},
+		"datacount": map[string]interface{}{
+			"$sum": 1,
+		},
+	}
+	group2 := map[string]interface{}{
+		"_id": "$_id.spidercode",
+		"stateinfo": map[string]interface{}{
+			"$push": map[string]interface{}{
+				"state": "$_id.state",
+				"count": "$datacount",
+			},
+		},
+		"count": map[string]interface{}{
+			"$sum": "$datacount",
+		},
+	}
+	project := map[string]interface{}{
+		"statearr": "$stateinfo",
+		"count":    1,
+	}
+	p := []map[string]interface{}{
+		map[string]interface{}{"$match": match},
+		map[string]interface{}{"$group": group1},
+		map[string]interface{}{"$group": group2},
+		map[string]interface{}{"$project": project},
+	}
+	lock := &sync.Mutex{}
+	wg := &sync.WaitGroup{}
+	ch := make(chan bool, 5)
+	//1、统计spider_highlistdata
+	it1 := sess.DB(util.MgoS.DbName).C("spider_highlistdata").Pipe(p).Iter()
+	n1 := 0
+	for tmp := make(map[string]interface{}); it1.Next(&tmp); n1++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			code := qu.ObjToString(tmp["_id"])
+			count := qu.IntAll(tmp["count"])                         //下载总量
+			successCount := 0                                        //下载成功总量
+			failedCount := 0                                         //下载失败量
+			noCount := 0                                             //未下载量
+			if stateArr, ok := tmp["statearr"].([]interface{}); ok { //某个爬虫的下载量信息
+				for _, stateInfo := range stateArr {
+					infoMap := stateInfo.(map[string]interface{})
+					state := qu.IntAll(infoMap["state"])
+					if state == 1 { //state:1,下载成功量
+						successCount = qu.IntAll(infoMap["count"])
+					} else if state == -1 { //state:-1,下载失败量
+						failedCount = qu.IntAll(infoMap["count"])
+					} else if state == 0 { //state:0,未下载量
+						noCount = qu.IntAll(infoMap["count"])
+					}
+				}
+			}
+			errArr := []*ErrRemark{}
+			if failedCount > 0 { //有采集失败的数据,查询失败链接
+				query := map[string]interface{}{
+					"comeintime": map[string]interface{}{
+						"$gte": StartTime,
+						"$lt":  EndTime,
+					},
+					"spidercode": code,
+					"state":      -1,
+				}
+				logger.Debug("采集失败爬虫:", code)
+				list, _ := util.MgoS.Find("spider_highlistdata", query, nil, map[string]interface{}{"href": 1}, false, 0, 3)
+				for _, l := range *list {
+					errArr = append(errArr, &ErrRemark{
+						Href:   qu.ObjToString(l["href"]),
+						Remark: "Download Failed",
+					})
+				}
+			}
+			lock.Lock()
+			if spider := CodeInfoMap[code]; spider != nil {
+				spider.DownloadAllNum = count
+				spider.DownloadSuccessNum = successCount
+				spider.DownloadFailedNum = failedCount
+				spider.NoDownloadNum = noCount
+				if len(errArr) > 0 {
+					spider.Error["download"] = &ErrorInfo{
+						Num: failedCount,
+						Err: errArr,
+					}
+				}
+			}
+			lock.Unlock()
+		}(tmp)
+		if n1%100 == 0 {
+			logger.Debug(n1)
+		}
+		tmp = map[string]interface{}{}
+	}
+	//2、统计spider_listdata
+	it2 := sess.DB(util.MgoS.DbName).C("spider_listdata").Pipe(p).Iter()
+	n2 := 0
+	for tmp := make(map[string]interface{}); it2.Next(&tmp); n2++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			code := qu.ObjToString(tmp["_id"])
+			count := qu.IntAll(tmp["count"]) //下载总量(不准确,含重复数据)
+			successCount := 0
+			failedCount := 0 //下载失败量(不准确,含重复数据)
+			noCount := 0     //未下载量
+			if stateArr, ok := tmp["statearr"].([]interface{}); ok {
+				for _, stateInfo := range stateArr {
+					infoMap := stateInfo.(map[string]interface{})
+					state := qu.IntAll(infoMap["state"])
+					if state == 1 { //state:1,下载成功量
+						successCount = qu.IntAll(infoMap["count"])
+					} else if state == -1 { //state:-1,下载失败量
+						failedCount = qu.IntAll(infoMap["count"])
+					} else if state == 0 { //state:0,下载失败量
+						noCount = qu.IntAll(infoMap["count"])
+					}
+				}
+			}
+			//errArr := []map[string]interface{}{}
+			//if failedCount > 0 { //有采集失败的数据,查询失败链接
+			//	match2["spidercode"] = code
+			//	match2["state"] = -1
+			//	logger.Debug("采集失败数据query:", match2)
+			//	list, _ := util.MgoS.Find("spider_listdata", match2, nil, map[string]interface{}{"href": 1}, false, 0, 3)
+			//	for _, l := range *list {
+			//		errArr = append(errArr, map[string]interface{}{
+			//			"href":   l["href"],
+			//			"remark": "Download Failed",
+			//		})
+			//	}
+			//}
+			lock.Lock()
+			if spider := CodeInfoMap[code]; spider != nil {
+				spider.DownloadAllNum = count
+				spider.DownloadSuccessNum = successCount
+				spider.DownloadFailedNum = failedCount
+				spider.NoDownloadNum = noCount
+				//if len(errArr) > 0 {
+				//	spider.Error["download"] = &ErrorInfo{
+				//		Num: failedCount,
+				//		Err: errArr,
+				//	}
+				//}
+			} else {
+				logger.Debug("-------------", code)
+			}
+			lock.Unlock()
+		}(tmp)
+		if n2%100 == 0 {
+			logger.Debug(n2)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("统计采集量完成...")
+}
+
+// GetSpiderDownloadRateDataNew 汇总列表页采集频率情况
+func GetSpiderDownloadRateDataNew() {
+	defer qu.Catch()
+	sess := util.MgoS.GetMgoConn()
+	defer util.MgoS.DestoryMongoConn(sess)
+	ch := make(chan bool, 5)
+	wg := &sync.WaitGroup{}
+	lock := &sync.Mutex{}
+	date := qu.FormatDateByInt64(&StartTime, qu.Date_Short_Layout)
+	query := map[string]interface{}{
+		"date": date,
+		"event": map[string]interface{}{
+			"$ne": 7000,
+		},
+	}
+	fields := map[string]interface{}{
+		"spidercode": 1,
+		"alltimes":   1,
+		"zero":       1,
+		"oh_percent": 1,
+	}
+	logger.Debug("query:", query)
+	it := sess.DB(util.MgoS.DbName).C("spider_downloadrate").Find(&query).Select(&fields).Iter()
+	n := 0
+	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
+		ch <- true
+		wg.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			code := qu.ObjToString(tmp["spidercode"])
+			alltimes := qu.IntAll(tmp["alltimes"])
+			zero := qu.IntAll(tmp["zero"])
+			oh_percent := qu.IntAll(tmp["oh_percent"])
+			lock.Lock()
+			if spider := CodeInfoMap[code]; spider != nil {
+				spider.ListDownloadAllTimes = alltimes
+				spider.ListNoDataTimes = zero
+				if oh_percent > 0 && util.CodeEventModel[spider.Event] != 0 { //含有100%采集,及为采集频率异常(由于7410、7500、7700为老模式的队列模式,不建采集频率异常任务)
+					spider.FrequencyErrTimes++
+					spider.ListOhPercentTimes = oh_percent
+				}
+			} else {
+				logger.Debug("-------------", code)
+			}
+			lock.Unlock()
+		}(tmp)
+		if n%1000 == 0 {
+			logger.Debug("current:", n)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("列表页采集统计完成...")
+}
+
+//汇总lua错误信息数据
+func GetSpiderWarnErrData() {
+	defer qu.Catch()
+	logger.Debug("错误信息数据统计...")
+	sess := util.MgoS.GetMgoConn()
+	defer util.MgoS.DestoryMongoConn(sess)
+	match := map[string]interface{}{
+		"level": 2,
+		"comeintime": map[string]interface{}{
+			"$gte": StartTime,
+			"$lt":  EndTime,
+		},
+	}
+	group1 := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"code": "$code",
+			"info": "$info",
+		},
+		"datacount": map[string]interface{}{
+			"$sum": 1,
+		},
+	}
+	group2 := map[string]interface{}{
+		"_id": "$_id.code",
+		"infotext": map[string]interface{}{
+			"$push": map[string]interface{}{
+				"info":  "$_id.info",
+				"count": "$datacount",
+			},
+		},
+		"count": map[string]interface{}{
+			"$sum": "$datacount",
+		},
+	}
+	project := map[string]interface{}{
+		"infoarr": "$infotext",
+		"count":   1,
+	}
+	p := []map[string]interface{}{
+		map[string]interface{}{"$match": match},
+		map[string]interface{}{"$group": group1},
+		map[string]interface{}{"$group": group2},
+		map[string]interface{}{"$project": project},
+	}
+	logger.Debug("spider_warn:", match)
+	//1、统计spider_warn
+	it1 := sess.DB(util.MgoS.DbName).C("spider_warn").Pipe(p).Iter()
+	n1 := 0
+	ch := make(chan bool, 5)
+	wg := &sync.WaitGroup{}
+	lock := &sync.Mutex{}
+	for tmp := make(map[string]interface{}); it1.Next(&tmp); n1++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			code := qu.ObjToString(tmp["_id"])
+			//spider.Error = map[string]*ErrorInfo{} //初始化
+			if infoArr, ok := tmp["infoarr"].([]interface{}); ok {
+				for _, info := range infoArr {
+					stype := ""
+					query := map[string]interface{}{
+						"level": 2,
+						"comeintime": map[string]interface{}{
+							"$gte": StartTime,
+							"$lt":  EndTime,
+						},
+					}
+					infoMap := info.(map[string]interface{})
+					infoText := qu.ObjToString(infoMap["info"]) //错误信息
+					errCount := qu.IntAll(infoMap["count"])     //错误数量
+					if infoText == "Publishtime Is Too Late" {  //发布时间超前
+						query["info"] = infoText
+						stype = "publishtime"
+					} else if infoText == "Publishtime Is Less Than Zero" { //发布时间小于0
+						query["info"] = infoText
+						stype = "publishtime"
+					} else if infoText == "Publishtime Is Too Early" { //发布时间过小
+						query["info"] = infoText
+						stype = "publishtime"
+					} else if infoText == "Field Value Not Contains Chinese" { //title、detail不含中文
+						query["info"] = infoText
+						stype = "text"
+					} else if infoText == "Field Value Contains Random Code" { //title、detail含乱码
+						query["info"] = infoText
+						stype = "text"
+					} else {
+						continue
+					}
+					query["code"] = code
+					//logger.Debug(query)
+					//errArr := []*ErrRemark{}
+					//list, _ := util.MgoS.Find("spider_warn", query, nil, map[string]interface{}{"href": 1}, false, 0, 3)
+					//for _, l := range *list {
+					//	errArr = append(errArr, &ErrRemark{
+					//		Href:   qu.ObjToString(l["href"]),
+					//		Remark: infoText,
+					//	})
+					//}
+					one, _ := util.MgoS.FindOne("spider_warn", query) //查询该错误信息类型的一条href
+					oneErrInfo := &ErrRemark{
+						Href:   qu.ObjToString((*one)["href"]),
+						Remark: infoText,
+					}
+					lock.Lock()
+					if spider := CodeInfoMap[code]; spider != nil {
+						if errMap := spider.Error[stype]; errMap != nil {
+							errMap.Num += errCount
+							errMap.Err = append(errMap.Err, oneErrInfo)
+						} else {
+							spider.Error[stype] = &ErrorInfo{
+								Num: errCount,
+								Err: []*ErrRemark{
+									oneErrInfo,
+								},
+							}
+						}
+					}
+					lock.Unlock()
+				}
+			}
+
+		}(tmp)
+		if n1%10 == 0 {
+			logger.Debug(n1)
+		}
+		tmp = map[string]interface{}{}
+	}
+	//2、统计regatherdata
+	match = map[string]interface{}{
+		"state": map[string]interface{}{
+			"$lte": 1,
+		},
+		"from": "lua",
+		"comeintime": map[string]interface{}{
+			"$gte": StartTime,
+			"$lt":  EndTime,
+		},
+	}
+	group1 = map[string]interface{}{
+		"_id": "$spidercode",
+		"count": map[string]interface{}{
+			"$sum": 1,
+		},
+	}
+	p = []map[string]interface{}{
+		map[string]interface{}{"$match": match},
+		map[string]interface{}{"$group": group1},
+	}
+	logger.Debug("regather query:", match)
+	it2 := sess.DB(util.MgoS.DbName).C("regatherdata").Pipe(p).Iter()
+	n2 := 0
+	for tmp := make(map[string]interface{}); it2.Next(&tmp); n2++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			code := qu.ObjToString(tmp["_id"]) //爬虫代码
+			count := qu.IntAll(tmp["count"])   //异常数据量
+			query := map[string]interface{}{
+				"state": map[string]interface{}{
+					"$lte": 1,
+				},
+				"from": "lua",
+				"comeintime": map[string]interface{}{
+					"$gte": StartTime,
+					"$lt":  EndTime,
+				},
+				"spidercode": code,
+			}
+			//logger.Debug("query:", query)
+
+			errArr := []*ErrRemark{}
+			list, _ := util.MgoS.Find("regatherdata", query, nil, map[string]interface{}{"href": 1, "error": 1}, false, 0, 3)
+			for _, l := range *list {
+				errArr = append(errArr, &ErrRemark{
+					Href:   qu.ObjToString(l["href"]),
+					Remark: qu.ObjToString(l["error"]),
+				})
+			}
+			//one, _ := util.MgoS.FindOne("regatherdata", query) //查询该错误信息类型的一条href
+			//oneErrInfo := &ErrRemark{
+			//	Href:   qu.ObjToString((*one)["href"]),
+			//	Remark: qu.ObjToString((*one)["error"]),
+			//}
+			if spider := CodeInfoMap[code]; spider != nil {
+				spider.Error["regather"] = &ErrorInfo{
+					Num: count,
+					Err: errArr,
+				}
+				// if spider_err := spider.Error; spider_err != nil {
+				// 	spider_err["regather"] = &ErrorInfo{
+				// 		Num: count,
+				// 		Err: []map[string]interface{}{
+				// 			oneErrInfo,
+				// 		},
+				// 	}
+				// } else {
+				// 	spider.Error = map[string]*ErrorInfo{
+				// 		"regather": &ErrorInfo{
+				// 			Num: count,
+				// 			Err: []map[string]interface{}{
+				// 				oneErrInfo,
+				// 			},
+				// 		},
+				// 	}
+				// }
+			}
+		}(tmp)
+		if n2%10 == 0 {
+			logger.Debug(n2)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("错误信息数据统计完成...")
+}
+
+//汇总python错误信息数据
+func GetPythonWarnErrData() {
+	GetPythonDownloadNum() //统计总下载量
+	GetPythonErrData()     //统计异常信息
+}
+
+//统计总下载量
+func GetPythonDownloadNum() {
+	defer qu.Catch()
+	sess := util.MgoPy.GetMgoConn()
+	defer util.MgoPy.DestoryMongoConn(sess)
+	match := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": StartTime,
+			"$lt":  EndTime,
+		},
+	}
+	group1 := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"spidercode": "$spidercode",
+			"sendflag":   "$sendflag",
+		},
+		"datacount": map[string]interface{}{
+			"$sum": 1,
+		},
+	}
+	group2 := map[string]interface{}{
+		"_id": "$_id.spidercode",
+		"sendflagarr": map[string]interface{}{
+			"$push": map[string]interface{}{
+				"sendflag": "$_id.sendflag",
+				"count":    "$datacount",
+			},
+		},
+		"count": map[string]interface{}{
+			"$sum": "$datacount",
+		},
+	}
+	project := map[string]interface{}{
+		"infoarr": "$sendflagarr",
+		"count":   1,
+	}
+	p := []map[string]interface{}{
+		map[string]interface{}{"$match": match},
+		map[string]interface{}{"$group": group1},
+		map[string]interface{}{"$group": group2},
+		map[string]interface{}{"$project": project},
+	}
+	ch := make(chan bool, 5)
+	wg := &sync.WaitGroup{}
+	lock := &sync.Mutex{}
+	it1 := sess.DB(util.MgoPy.DbName).C("data_bak").Pipe(p).Iter()
+	n := 0
+	for tmp := make(map[string]interface{}); it1.Next(&tmp); n++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			code := qu.ObjToString(tmp["_id"])
+			count := qu.IntAll(tmp["count"]) //下载总量
+			successCount := 0                //下载成功总量
+			if infoArr, ok := tmp["infoarr"].([]interface{}); ok {
+				for _, info := range infoArr {
+					infoMap := info.(map[string]interface{})
+					if sendflag := qu.ObjToString(infoMap["sendflag"]); sendflag == "true" {
+						successCount = qu.IntAll(infoMap["count"])
+					}
+				}
+			}
+			lock.Lock()
+			if spider := CodeInfoMap[code]; spider != nil {
+				spider.DownloadAllNum = count
+				spider.DownloadSuccessNum = successCount //保存服务发送成功数
+			}
+			lock.Unlock()
+		}(tmp)
+		if n%100 == 0 {
+			logger.Debug(n)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("python数据下载量统计完成...")
+}
+
+//统计异常信息
+func GetPythonErrData() {
+	defer qu.Catch()
+	sess := util.MgoPy.GetMgoConn()
+	defer util.MgoPy.DestoryMongoConn(sess)
+	query := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": StartTime,
+			"$lt":  EndTime,
+		},
+	}
+	fieles := map[string]interface{}{
+		"spidercode":  1,
+		"parser_name": 1,
+		"parse_url":   1,
+		"failed":      1,
+		"code":        1,
+	}
+	it := sess.DB(util.MgoPy.DbName).C("mgp_list").Find(&query).Select(&fieles).Iter()
+	n := 0
+	lock := &sync.Mutex{}
+	wg := &sync.WaitGroup{}
+	ch := make(chan bool, 5)
+	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			state := qu.IntAll(tmp["code"])
+			if state == -1 { //状态码为-1表示详情页未执行下载操作,不统计
+				return
+			}
+			spidercode := qu.ObjToString(tmp["spidercode"])
+			remark := qu.ObjToString(tmp["parser_name"])
+			href := qu.ObjToString(tmp["parse_url"])
+			failed := qu.IntAll(tmp["failed"])
+			errType := StateFeedBackErr[state]
+			oneErrInfo := &ErrRemark{
+				Href:   href,
+				Remark: remark,
+			}
+			lock.Lock()
+			if spider := CodeInfoMap[spidercode]; spider != nil {
+				if failed == 0 { //未采集
+					spider.NoDownloadNum++
+				} else { //下载失败
+					spider.DownloadFailedNum++
+					if spider_err := spider.Error; spider_err != nil {
+						if errInfo := spider_err[errType]; errInfo != nil {
+							errInfo.Num++
+							if len(errInfo.Err) < 3 { //最多存放三个错误数据连接
+								errInfo.Err = append(errInfo.Err, oneErrInfo)
+							}
+						} else {
+							spider.Error[errType] = &ErrorInfo{
+								Num: 1,
+								Err: []*ErrRemark{
+									oneErrInfo,
+								},
+							}
+						}
+					} else {
+						spider.Error = map[string]*ErrorInfo{
+							errType: &ErrorInfo{
+								Num: 1,
+								Err: []*ErrRemark{
+									oneErrInfo,
+								},
+							},
+						}
+					}
+				}
+			}
+			lock.Unlock()
+		}(tmp)
+		if n%100 == 0 {
+			logger.Debug(n)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("python下载异常数据统计完成...")
+}
+
+//根据爬虫监控信息创建任务流程
+func CreateTaskProcess() {
+	defer qu.Catch()
+	logger.Debug("开始生成爬虫任务...")
+	//arr := []map[string]interface{}{}
+	upsertBulk := [][]map[string]interface{}{} //任务更新集
+	arr := []map[string]interface{}{}          //当天爬虫信息集
+	wg := &sync.WaitGroup{}
+	lock := &sync.Mutex{}
+	ch := make(chan bool, 10)
+	logger.Debug("CodeInfoMap:", len(CodeInfoMap))
+	for code, spider := range CodeInfoMap {
+		wg.Add(1)
+		ch <- true
+		go func(code string, spider *Spider) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			//整理新任务的信息
+			task := &Task{
+				DescribeMap: map[int]string{},
+			}
+			//task.Platform = spider.Platform
+			//task.Site = spider.Site
+			//task.Code = spider.Code
+			//task.Channel = spider.Channel
+			//task.ModifyUser = spider.ModifyUser
+			//task.ModifyId = spider.ModifyId
+			//task.FrequencyErrTimes = spider.FrequencyErrTimes
+			//lua、python共有异常publishtime、text
+			if len(spider.Error) > 0 {
+				//1、download:下载异常errtype:5;
+				//2、regather:运行异常errtype:4;
+				//3、publishtime:时间异常errtype:3;
+				//4、text:数据异常errtype:2;
+				for stype, info := range LuaErrTypeInfoMap {
+					if err := spider.Error[stype]; err != nil {
+						//取最大的错误异常类型
+						if task.ErrType < info.ErrType {
+							task.ErrType = info.ErrType
+						}
+						//download、regather、publishtime、text错误中有一个类型错误个数大于10,任务状态即为待处理
+						if err.Num > 10 { //错误个数大于10为待处理
+							task.State = 1 //待处理
+						}
+						//错误描述
+						descript := info.Remark + ":共" + fmt.Sprint(err.Num) + "条\n"
+						for _, errRemark := range err.Err {
+							descript += errRemark.Remark + ":" + errRemark.Href + "\n"
+						}
+						task.DescribeMap[info.ErrType] = descript
+					}
+				}
+			}
+			if spider.Platform == "golua平台" { //lua异常(由于采集频率异常比较特殊固放到最后处理)
+				if spider.ListNoDataTimes > 0 { //列表页无采集数据
+					//5、列表页异常	errtype:7
+					if !spider.ListIsFilter { //列表页不含过滤代码
+						task.State = 1 //待处理
+						task.ErrType = TASK_LISTERR
+					} else if len(task.DescribeMap) == 0 { //只有列表页异常且有过滤代码
+						task.State = 0 //待确认
+						task.ErrType = TASK_LISTERR
+					}
+					task.DescribeMap[TASK_LISTERR] = "列表页异常:\n 列表页共采集" + fmt.Sprint(spider.ListDownloadAllTimes) + "轮,其中有" + fmt.Sprint(spider.ListNoDataTimes) + "轮无数据\n"
+				}
+				//6、采集频率异常	errtype:8
+				if spider.ListOhPercentTimes > 0 { //采集频率异常
+					UpdateLuaInfo(spider) //出现采集频率异常,便更新爬虫的frequencyerrtimes、最大页自动加1、重新上架
+					//只有当FrequencyErrTimes>3取采集频率异常,相反优先其他异常类型(采集频率异常且待确认时程序自动处理,人工几乎不介入)
+					if spider.FrequencyErrTimes > 3 { //爬虫采集频率异常次数大于3次,任务为待处理,否则为待确认
+						task.State = 1 //待处理
+						task.ErrType = TASK_RATEERR
+					} else if len(task.DescribeMap) == 0 { //只有采集频率异常且FrequencyErrTimes<=3
+						task.State = 0 //待确认
+						task.ErrType = TASK_RATEERR
+					}
+					task.DescribeMap[TASK_RATEERR] = "采集频率异常:\n 列表页共采集" + fmt.Sprint(spider.ListDownloadAllTimes) + "轮,其中有" + fmt.Sprint(spider.ListOhPercentTimes) + "轮数据全采\n"
+				}
+			} else if spider.Platform == "python" { //python异常
+				for stype, info := range PythonErrTypeInfoMap {
+					if err := spider.Error[stype]; err != nil {
+						//取最大的错误异常类型
+						if task.ErrType < info.ErrType {
+							task.ErrType = info.ErrType
+						}
+						if info.ErrType > 3 { //python404异常、下载异常、运行异常任务状态均为待处理
+							task.State = 1
+						}
+						//错误描述
+						descript := info.Remark + ":共" + fmt.Sprint(err.Num) + "条\n"
+						for _, errRemark := range err.Err {
+							descript += errRemark.Remark + ":" + errRemark.Href + "\n"
+						}
+						//lua和python的info.ErrType:3、4可能同时存在,描述累加
+						task.DescribeMap[info.ErrType] = descript + task.DescribeMap[info.ErrType]
+					}
+				}
+			}
+			//存储爬虫统计信息
+			byteText, err := json.Marshal(spider)
+			if err != nil {
+				logger.Debug("Json Marshal Error", code)
+				return
+			}
+			tmp := map[string]interface{}{}
+			if json.Unmarshal(byteText, &tmp) == nil {
+				lock.Lock()
+				arr = append(arr, tmp)
+				lock.Unlock()
+			} else {
+				logger.Debug("Json UnMarshal Error", code)
+				return
+			}
+			//根据爬虫信息新建任务
+			CreateTask(task, spider, &upsertBulk, lock) //比对历史任务,新建任务
+			//
+			lock.Lock()
+			if len(arr) > 500 {
+				util.MgoE.SaveBulk("luacodeinfo", arr...)
+				arr = []map[string]interface{}{}
+			}
+			if len(upsertBulk) > 500 {
+				util.MgoE.UpSertBulk("task", upsertBulk...)
+				upsertBulk = [][]map[string]interface{}{}
+			}
+			lock.Unlock()
+		}(code, spider)
+	}
+	wg.Wait()
+	lock.Lock()
+	if len(arr) > 0 {
+		util.MgoE.SaveBulk("luacodeinfo", arr...)
+		arr = []map[string]interface{}{}
+	}
+	if len(upsertBulk) > 0 {
+		util.MgoE.UpSertBulk("task", upsertBulk...)
+		upsertBulk = [][]map[string]interface{}{}
+	}
+	lock.Unlock()
+	logger.Debug("生成任务完成...")
+	CodeInfoMap = map[string]*Spider{}
+}
+
+//新任务与历史任务整合
+func CreateTask(t *Task, sp *Spider, upsertBulk *[][]map[string]interface{}, lock *sync.Mutex) {
+	defer qu.Catch()
+	if t.ErrType == 0 { //不是异常任务
+		return
+	}
+	diff := time.Now().Unix() - sp.AuditTime
+	if sp.State == 5 && diff <= 86400 { //已上架爬虫且爬虫最新一次提交审核时间小于24小时,不建任务
+		logger.Debug("该爬虫近期维护无需新建任务:", sp.Code)
+		return
+	}
+	descript_new := "" //新任务的异常描述
+	for _, text := range t.DescribeMap {
+		descript_new += text
+	}
+	query := map[string]interface{}{
+		"s_code": sp.Code,
+		"i_state": map[string]interface{}{
+			"$in": []int{0, 1, 2, 3, 5}, //查询现有正在维护的任务
+		},
+	}
+	fields := map[string]interface{}{
+		"i_state":    1,
+		"s_type":     1,
+		"s_descript": 1,
+		"i_times":    1,
+		"s_urgency":  1,
+	}
+	list, _ := util.MgoE.Find("task", query, nil, fields, false, -1, -1)
+	update := []map[string]interface{}{}
+	if list != nil && len(*list) > 0 { //已有任务
+		if len(*list) > 1 {
+			logger.Error("Code:", sp.Code, "任务异常")
+			util.MgoE.Save("luacreatetaskerr", map[string]interface{}{
+				"code":       sp.Code,
+				"comeintime": time.Now().Unix(),
+				"tasknum":    len(*list),
+			})
+			return
+		}
+		task := (*list)[0]                                 //唯一任务
+		state_old := qu.IntAll(task["i_state"])            //历史任务状态
+		times_old := qu.IntAll(task["i_times"])            //历史任务待处理状态次数
+		type_old := qu.ObjToString(task["s_type"])         //历史任务异常类型
+		urgency_old := qu.ObjToString(task["s_urgency"])   //历史任务紧急度
+		descript_old := qu.ObjToString(task["s_descript"]) //历史任务描述
+		result := map[string]interface{}{
+			"i_frequencyerrtimes": sp.FrequencyErrTimes,
+			"i_num":               sp.DownloadSuccessNum, //下载量(目前按下载成功量)
+			"l_updatetime":        time.Now().Unix(),
+			"i_times":             times_old + 1,
+			"s_descript":          descript_old + time.Now().Format(qu.Date_Short_Layout) + "追加描述:------------------------------\n" + descript_new,
+		}
+		if state_old == 0 || state_old == 1 { //如果历史任务状态为待确认、待处理,更新任务信息,其它状态只追加任务描述、任务次数、下载量
+			//任务状态state、任务类型s_type
+			if state_old == 1 || t.State == 1 { //新任务、历史任务有一个任务状态为待处理,更新后任务状态为待处理
+				result["i_state"] = 1
+				if t.State == 1 && state_old == 1 { //新任务和历史任务均为待处理时,取异常类型等级高者
+					if t.ErrType > qu.IntAll(type_old) {
+						result["s_type"] = fmt.Sprint(t.ErrType)
+					}
+				} else if t.State == 1 { //新任务为待处理历史任务为待确认,取新任务的类型
+					result["s_type"] = fmt.Sprint(t.ErrType)
+				} /*else if state_old == 1 {
+				}*/
+			} else if state_old == 0 && t.State == 0 && t.ErrType > qu.IntAll(type_old) { //新任务、历史任务均为待确认,取异常类型等级高者
+				result["s_type"] = fmt.Sprint(t.ErrType)
+			}
+			if times_old >= 3 { //某爬虫第四次建任务时,任务状态变为待处理
+				result["i_state"] = 1
+			}
+			//任务紧急度urgency
+			urgency := qu.IntAll(urgency_old)
+			if urgency < 4 {
+				result["s_urgency"] = fmt.Sprint(urgency + 1)
+			}
+			//最迟完成时间
+			if qu.IntAll(result["i_state"]) == 1 && state_old == 0 { //新任务综合处理后任务状态为待处理,历史任务为待确认时,更新最迟完成时间
+				result["l_complete"] = util.CompleteTime(fmt.Sprint(urgency + 1))
+			}
+		}
+		update = append(update, map[string]interface{}{"_id": task["_id"]})
+		update = append(update, map[string]interface{}{"$set": result})
+		lock.Lock()
+		*upsertBulk = append(*upsertBulk, update)
+		lock.Unlock()
+	} else { //无历史任务
+		//times := 0
+		//if t.State == 1 { //待处理times=1
+		//	times = 1
+		//}
+		saveMap := map[string]interface{}{
+			"s_modify":     sp.ModifyUser,
+			"s_modifyid":   sp.ModifyId,
+			"s_code":       sp.Code,
+			"s_site":       sp.Site,
+			"s_channel":    sp.Channel,
+			"i_event":      sp.Event,
+			"i_state":      t.State,
+			"s_source":     "程序",
+			"s_type":       fmt.Sprint(t.ErrType),
+			"s_descript":   descript_new,
+			"i_times":      1,
+			"i_num":        sp.DownloadSuccessNum, //下载量(目前按下载成功量)
+			"l_comeintime": time.Now().Unix(),
+			//"l_updatetime": time.Now().Unix(),
+			"l_complete":          util.CompleteTime("1"),
+			"s_urgency":           "1",
+			"i_frequencyerrtimes": sp.FrequencyErrTimes,
+		}
+		update = append(update, query)
+		update = append(update, saveMap)
+		lock.Lock()
+		*upsertBulk = append(*upsertBulk, update)
+		lock.Unlock()
+	}
+}
+
+//更新爬虫最大页、爬虫上下架
+func UpdateLuaInfo(sp *Spider) {
+	defer qu.Catch()
+	//1、更新爬虫信息
+	set := map[string]interface{}{
+		"frequencyerrtimes": sp.FrequencyErrTimes, //更新次数
+	}
+	if sp.FrequencyErrTimes <= 3 {
+		set["param_common.5"] = sp.MaxPage + 1
+	}
+	logger.Debug("Code:", sp.Code, "	", sp.FrequencyErrTimes)
+	b := util.MgoE.Update("luaconfig", map[string]interface{}{"code": sp.Code}, map[string]interface{}{"$set": set}, false, false)
+	if b && sp.FrequencyErrTimes <= 3 { //FrequencyErrTimes>3时会建采集频率异常的待处理任务,不再上下架
+		//爬虫下架、上加
+		qu.Debug("爬虫上下架 code:", sp.Code)
+		ok, err := util.UpdateSpiderByCodeState(sp.Code, "6", sp.Event) //下架
+		if ok && err == nil {
+			logger.Debug(sp.Code, "下架成功")
+			time.Sleep(1 * time.Second)
+			ok, err = util.UpdateSpiderByCodeState(sp.Code, "5", sp.Event) //上架
+			if ok && err == nil {
+				logger.Debug(sp.Code, "上架成功")
+			}
+		}
+	}
+}
+
+//重置前一周内未下载成功的数据(一天3次未下成功的数据可以连续下一周)
+func ResetDataState() {
+	defer qu.Catch()
+	logger.Info("-----更新数据状态-----")
+	sess := util.MgoS.GetMgoConn()
+	defer util.MgoS.DestoryMongoConn(sess)
+	ch := make(chan bool, 3)
+	wg := &sync.WaitGroup{}
+	lock := &sync.Mutex{}
+	query := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": util.GetTime(-util.DayNum),
+			"$lt":  util.GetTime(0),
+		},
+		"state": -1,
+	}
+	field := map[string]interface{}{
+		"_id": 1,
+	}
+	it := sess.DB("spider").C("spider_highlistdata").Find(&query).Select(&field).Iter()
+	count, _ := sess.DB("spider").C("spider_highlistdata").Find(&query).Count()
+	logger.Info("更新数据状态数量:", count)
+	n := 0
+	arr := [][]map[string]interface{}{}
+	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
+		ch <- true
+		wg.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			update := []map[string]interface{}{}
+			update = append(update, map[string]interface{}{"_id": tmp["_id"]})
+			update = append(update, map[string]interface{}{"$set": map[string]interface{}{"times": 0, "state": 0}})
+			lock.Lock()
+			arr = append(arr, update)
+			if len(arr) > 500 {
+				tmps := arr
+				util.MgoS.UpdateBulk("spider_highlistdata", tmps...)
+				arr = [][]map[string]interface{}{}
+			}
+			lock.Unlock()
+		}(tmp)
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	lock.Lock()
+	if len(arr) > 0 {
+		util.MgoS.UpdateBulk("spider_highlistdata", arr...)
+		arr = [][]map[string]interface{}{}
+	}
+	lock.Unlock()
+	logger.Info("-----更新数据状态完毕-----")
+}
+
+//关闭任务
+func CloseTask() {
+	qu.Catch()
+	logger.Debug("---清理未更新任务---")
+	decreaseDay, day := 0, 0
+	var cleanDay string
+	for {
+		decreaseDay--
+		weekDay := time.Now().AddDate(0, 0, decreaseDay).Weekday().String()
+		if weekDay != "Saturday" && weekDay != "Sunday" {
+			day++
+		}
+		if day == util.CloseNum {
+			cleanDay = time.Now().AddDate(0, 0, decreaseDay).Format("2006-01-02")
+			break
+		}
+	}
+	the_time, _ := time.ParseInLocation(qu.Date_Short_Layout, cleanDay, time.Local)
+	unix_time := the_time.Unix() //凌晨时间戳
+	query := map[string]interface{}{
+		"i_state": 0,
+		"l_complete": map[string]interface{}{
+			"$lt": unix_time + 86400,
+		},
+		"s_type": "1",
+		// "s_type": map[string]interface{}{
+		// 	"$ne": "7",
+		// },
+	}
+	logger.Debug("query:", query)
+	set := map[string]interface{}{
+		"$set": map[string]interface{}{
+			"i_state": 6,
+		},
+	}
+	util.MgoE.Update("task", query, set, false, true)
+	logger.Debug("---清理未更新任务完毕---")
+}
+
+//保存爬虫每日监控信息
+func SaveCodeInfo() {
+	defer qu.Catch()
+	arr := []map[string]interface{}{}
+	wg := &sync.WaitGroup{}
+	lock := &sync.Mutex{}
+	ch := make(chan bool, 10)
+	logger.Debug("CodeInfoMap:", len(CodeInfoMap))
+	for code, spider := range CodeInfoMap {
+		wg.Add(1)
+		ch <- true
+		go func(code string, sp Spider) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			byteText, err := json.Marshal(sp)
+			if err != nil {
+				logger.Debug("Json Marshal Error", code)
+				return
+			}
+			tmp := map[string]interface{}{}
+			if json.Unmarshal(byteText, &tmp) == nil {
+				lock.Lock()
+				arr = append(arr, tmp)
+				lock.Unlock()
+			} else {
+				logger.Debug("Json UnMarshal Error", code)
+				return
+			}
+			lock.Lock()
+			if len(arr) > 500 {
+				util.MgoE.SaveBulk("luacodeinfo", arr...)
+				arr = []map[string]interface{}{}
+			}
+			lock.Unlock()
+		}(code, *spider)
+	}
+	wg.Wait()
+	if len(arr) > 0 {
+		util.MgoE.SaveBulk("luacodeinfo", arr...)
+		arr = []map[string]interface{}{}
+	}
+	logger.Debug("爬虫基本信息生成完成...")
+}
+
+func SaveUserCreateTaskNum() {
+	defer qu.Catch()
+	for user, sn := range UserTaskNum {
+		save := map[string]interface{}{}
+		save["user"] = user
+		save["comeintime"] = time.Now().Unix()
+		for s, n := range sn {
+			save[s] = n
+		}
+		util.MgoE.Save("luausertask", save)
+	}
+	UserTaskNum = map[string]map[string]int{}
+}

+ 23 - 69
src/main.go

@@ -2,92 +2,46 @@ package main
 
 import (
 	"fmt"
+	"github.com/donnie4w/go-logger/logger"
 	"io/ioutil"
-	mgo "mongodb"
+	"luatask"
 	"os"
 	qu "qfw/util"
-	"regexp"
 	"time"
+	"timetask"
+	"util"
 
 	"github.com/cron"
-
-	"github.com/donnie4w/go-logger/logger"
-)
-
-var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
-var (
-	Config map[string]interface{}
-	//User   map[string]string
-	MgoE *mgo.MongodbSim //editor
-	MgoS *mgo.MongodbSim //spider
 )
 
 func init() {
-	qu.ReadConfig(&Config)
-	//qu.ReadConfig("./user.json", &User)
-	//mgo
-	spider := Config["spider"].(map[string]interface{})
-	MgoS = &mgo.MongodbSim{
-		MongodbAddr: qu.ObjToString(spider["addr"]),
-		DbName:      qu.ObjToString(spider["db"]),
-		Size:        qu.IntAll(spider["size"]),
-	}
-	MgoS.InitPool()
-	editor := Config["editor"].(map[string]interface{})
-	MgoE = &mgo.MongodbSim{
-		MongodbAddr: qu.ObjToString(editor["addr"]),
-		DbName:      qu.ObjToString(editor["db"]),
-		Size:        qu.IntAll(editor["size"]),
-	}
-	MgoE.InitPool()
+	qu.ReadConfig(&util.Config)
+	qu.ReadConfig("./worktime.json", &util.Workfig)
+	qu.ReadConfig("./user.json", &timetask.LuaUserMap)
+	util.InitWorkfig()
+	timetask.GetLuaUserInfo()
+	//
+	util.InitMgo()
+	util.InitOther() //
+	//msgclient
+	util.InitMsgClient(
+		qu.ObjToString(util.CommServers["addr"]),
+		qu.ObjToString(util.BidServers["addr"]),
+		qu.ObjToString(util.CommServers["name"]),
+		qu.ObjToString(util.BidServers["name"]),
+	)
 	//logs
 	logger.SetRollingDaily("./logs", "task.log")
-	//爬虫上下浮动率
-	DownloadCheck = make(map[string]*DC)
-	downloadcheck := Config["downloadcheck"].(map[string]interface{})
-	for _, tmp := range downloadcheck {
-		tmpMap := tmp.(map[string]interface{})
-		downratio := qu.Float64All(tmpMap["downratio"])
-		uptatio := qu.Float64All(tmpMap["upratio"])
-		codes := tmpMap["spidercode"].([]interface{})
-		if len(codes) > 0 {
-			for _, code := range codes {
-				c := qu.ObjToString(code)
-				DownloadCheck[c] = &DC{
-					DownRatio: 1.0 - downratio/100.0,
-					UpRatio:   1.0 + uptatio/100.0,
-				}
-			}
-		} else {
-			DownloadCheck["other"] = &DC{
-				DownRatio: 1.0 - downratio/100.0,
-				UpRatio:   1.0 + uptatio/100.0,
-			}
-		}
-	}
-	StartTaskCron = qu.ObjToString(Config["startaskcron"])
-	UpdateStateCron = qu.ObjToString(Config["updatestatecron"])
-	CodeSummaryCron = qu.ObjToString(Config["codesummarycron"])
-	CloseNum = qu.IntAll(Config["closenum"])
-	DayNum = qu.IntAll(Config["daynum"])
-	//
-	YearMinDownloadNum = qu.IntAll(Config["yearmindownload"])
-	IntervalMaxNum = qu.IntAll(Config["intervalmaxnum"])
 }
 
 func main() {
 	go clearLogs()
-	//低采集量爬虫新建任务
-	LuaYearMinCodeCreateTask()
 	c := cron.New()
 	c.Start()
-	c.AddFunc(StartTaskCron, StartTask) //开始任务
+	c.AddFunc(util.RandomDataPushCron, timetask.GetSpiderWarnData)
+	c.AddFunc(util.QyworkRemindCron, timetask.SendInfoToWxWork)
+	c.AddFunc(util.StartTaskCron, luatask.StartTask) //开始任务
 	//c.AddFunc(CodeSummaryCron, SummaryCode)    //上架爬虫信息汇总
-	//c.AddFunc(UpdateStateCron, ResetDataState) //更新数据状态
-	//统计爬虫历史下载量制定任务周期
-	// GetSpidercode()
-	// TagCode()
-	// CycleTime()
 	ch := make(chan bool, 1)
 	<-ch
 }
@@ -99,7 +53,7 @@ func clearLogs() {
 	if err == nil {
 		for _, f := range dirs {
 			fname := f.Name()
-			logTimeStr := timeReg.FindString(fname)
+			logTimeStr := util.TimeReg.FindString(fname)
 			if logTimeStr == "" {
 				continue
 			}

+ 0 - 1088
src/task.go

@@ -1,1088 +0,0 @@
-package main
-
-import (
-	"fmt"
-	mgo "mongodb"
-	qu "qfw/util"
-	"strconv"
-	"sync"
-	"time"
-
-	"github.com/donnie4w/go-logger/logger"
-)
-
-type Task struct {
-	Code        string                            //爬虫代码
-	Site        string                            //站点
-	Channel     string                            //栏目
-	ErrType     string                            //异常类型:6:运行异常;5:下载异常;4:发布时间异常;3:乱码;2:状态码异常;1:数据量异常
-	ErrInfo     map[string]map[string]interface{} //异常集合
-	Description string                            //描述
-	State       int                               //状态
-}
-
-var (
-	StartTime       int64                     //上一个工作日的起始时间
-	EndTime         int64                     //上一个工作日的结束时间
-	TaskMap         map[string]*Task          //任务集合
-	StartTaskCron   string                    //任务开始
-	UpdateStateCron string                    //每天关闭任务的时间
-	CodeSummaryCron string                    //每天统计爬虫信息
-	CloseNum        int                       //关闭几天的任务
-	DayNum          int                       //更新数据天数
-	UserTaskNum     map[string]map[string]int //记录每人每天新建任务量
-)
-
-func StartTask() {
-	GetDownloadNumber() //统计下载量
-	CreateTaskProcess() //创建任务
-	ResetDataState()    //更新数据状态
-	CloseTask()         //关闭任务
-}
-
-//创建任务
-func CreateTaskProcess() {
-	InitInfo()                  //初始化
-	GetSpiderDownloadRateData() //1、统计spider_downloadrate前一天列表页采集异常爬虫
-	GetStatusCodeErrorData()    //2、统计spider_sitecheck 站点异常爬虫(404)
-	GetDownloadFailedData()     //3、统计spider_highlistdata前一天下载失败的爬虫数据(统计完成后修改状态state:0)
-	GetRegatherFailedData()     //4、统计regatherdata前一天重采失败的爬虫数据
-	GetDTPErrData()             //5、统计spider_warn异常数据(发布时间异常、乱码)
-	GetDownloadNumErrData()     //6、统计download前一天下载量异常的爬虫数据(每天1点统计下载量,目前统计完成需要1个小时)
-	SaveResult()                //保存统计信息
-	CreateLuaTask()             //创建任务
-	SaveUserCreateTaskNum()     //保存每人创建的任务量
-}
-
-//初始化
-func InitInfo() {
-	defer qu.Catch()
-	TaskMap = map[string]*Task{}
-	UserTaskNum = map[string]map[string]int{}
-	InitTime() //初始化时间
-}
-
-//关闭任务
-func CloseTask() {
-	qu.Catch()
-	logger.Debug("---清理未更新任务---")
-	decreaseDay, day := 0, 0
-	var cleanDay string
-	for {
-		decreaseDay--
-		weekDay := time.Now().AddDate(0, 0, decreaseDay).Weekday().String()
-		if weekDay != "Saturday" && weekDay != "Sunday" {
-			day++
-		}
-		if day == CloseNum {
-			cleanDay = time.Now().AddDate(0, 0, decreaseDay).Format("2006-01-02")
-			break
-		}
-	}
-	the_time, _ := time.ParseInLocation(qu.Date_Short_Layout, cleanDay, time.Local)
-	unix_time := the_time.Unix() //凌晨时间戳
-	query := map[string]interface{}{
-		"i_state": 0,
-		"l_complete": map[string]interface{}{
-			"$lt": unix_time + 86400,
-		},
-		"s_type": "1",
-		// "s_type": map[string]interface{}{
-		// 	"$ne": "7",
-		// },
-	}
-	logger.Debug("query:", query)
-	set := map[string]interface{}{
-		"$set": map[string]interface{}{
-			"i_state": 6,
-		},
-	}
-	MgoE.Update("task", query, set, false, true)
-	logger.Debug("---清理未更新任务完毕---")
-}
-
-//1、统计spider_downloadrate前一天列表页采集异常爬虫
-func GetSpiderDownloadRateData() {
-	defer qu.Catch()
-	logger.Debug("---开始统计spider_downloadrate异常信息---")
-	sess := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess)
-	ch := make(chan bool, 5)
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	date := qu.FormatDateByInt64(&StartTime, qu.Date_Short_Layout)
-	query := map[string]interface{}{
-		"date": date,
-	}
-	it := sess.DB("spider").C("spider_downloadrate").Find(&query).Iter()
-	n := 0
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		ch <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			stype := -1
-			//1、统计采集频率异常信息
-			oh_percent := qu.IntAll(tmp["oh_percent"])
-			event := qu.IntAll(tmp["event"])
-			if oh_percent > 0 && event != 7410 {
-				stype = 8
-			}
-
-			//2、统计列表页异常(统计zero占总下载次数的百分比超过80%的)
-			alltimes := qu.IntAll(tmp["alltimes"])
-			zero := qu.IntAll(tmp["zero"])
-			percent := 0 //记录百分比
-			if zero > 0 {
-				tmpPercent := float64(zero) / float64(alltimes)
-				tmpPercent, _ = strconv.ParseFloat(fmt.Sprintf("%.2f", tmpPercent), 64)
-				percent = int(tmpPercent * float64(100))
-				if percent >= 80 { //占比超过80%
-					stype = 7
-				}
-			}
-			if stype != -1 { //出现异常
-				code := qu.ObjToString(tmp["spidercode"])
-				site := qu.ObjToString(tmp["site"])
-				channel := qu.ObjToString(tmp["channel"])
-				t := &Task{
-					Code:    code,
-					Site:    site,
-					Channel: channel,
-					ErrInfo: map[string]map[string]interface{}{},
-					State:   1,
-				}
-				if stype == 8 {
-					t.ErrType = "8"
-					t.ErrInfo = map[string]map[string]interface{}{
-						"8": map[string]interface{}{
-							"num": oh_percent,
-						},
-					}
-					t.Description = "采集频率异常:\n 列表页共采集" + fmt.Sprint(alltimes) + "轮,其中有" + fmt.Sprint(oh_percent) + "轮数据全采\n"
-				} else if stype == 7 {
-					t.ErrType = "7"
-					t.ErrInfo = map[string]map[string]interface{}{
-						"7": map[string]interface{}{
-							"num": percent,
-						},
-					}
-					t.Description = "列表页异常:\n 列表页采集无信息次数占比" + fmt.Sprint(percent) + "%\n"
-				}
-				lock.Lock()
-				TaskMap[code] = t
-				lock.Unlock()
-			}
-		}(tmp)
-		if n%100 == 0 {
-			qu.Debug("current:", n)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	logger.Debug("---统计spider_downloadrate异常信息完成---")
-}
-
-//2、状态码404
-func GetStatusCodeErrorData() {
-	defer qu.Catch()
-	logger.Debug("---开始统计栏目地址404数据---")
-	sess := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess)
-	ch := make(chan bool, 5)
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	field := map[string]interface{}{
-		"url":     1,
-		"code":    1,
-		"site":    1,
-		"channel": 1,
-	}
-	query := map[string]interface{}{
-		"comeintime": map[string]interface{}{
-			"$gte": StartTime,
-			"$lte": EndTime,
-		},
-		"statuscode": 404,
-	}
-	it := sess.DB("spider").C("spider_sitecheck").Find(&query).Select(&field).Iter()
-	count, _ := sess.DB("spider").C("spider_sitecheck").Find(&query).Count()
-	logger.Debug("共有404地址", count, "条")
-	n := 0
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		ch <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			code := qu.ObjToString(tmp["code"])
-			one, _ := MgoE.FindOneByField("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"state": 1})
-			state := qu.IntAll((*one)["state"])
-			if state == 4 || state > 6 {
-				return
-			}
-			//判断3天内是否有采集数据,有则不建404任务
-			stime, etime := GetTime(-3), GetTime(0)
-			q := map[string]interface{}{
-				"spidercode": code,
-				"l_np_publishtime": map[string]interface{}{
-					"$gte": stime,
-					"$lte": etime,
-				},
-			}
-			if MgoS.Count("data_bak", q) > 0 { //有采集数据,不认为是404
-				return
-			}
-			href := qu.ObjToString(tmp["url"])
-			site := qu.ObjToString(tmp["site"])
-			channel := qu.ObjToString(tmp["channel"])
-			lock.Lock()
-			if t := TaskMap[code]; t != nil {
-				t.ErrInfo["6"] = map[string]interface{}{ //ErrInfo新增下载异常信息
-					"num":   404,
-					"hrefs": []string{href},
-				}
-				t.Description += "网站监测:404\n" + href + "\n"
-				t.State = 1
-			} else {
-				t := &Task{
-					Code:        code,
-					Site:        site,
-					Channel:     channel,
-					ErrType:     "6",
-					ErrInfo:     map[string]map[string]interface{}{},
-					Description: "网站监测:404\n" + href + "\n",
-					State:       1,
-				}
-				t.ErrInfo = map[string]map[string]interface{}{
-					"6": map[string]interface{}{
-						"num":   404,
-						"hrefs": []string{href},
-					},
-				}
-				TaskMap[code] = t
-			}
-			lock.Unlock()
-		}(tmp)
-		if n%100 == 0 {
-			qu.Debug("current:", n)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	logger.Debug("---统计栏目地址404数据完成---")
-}
-
-//3、统计三级页下载失败数据
-/*
-	先统计下载失败信息再更新下载失败信息状态(ResetDataState)使其可重新下载,这样不影响统计
-	但是任务已经就绪,若下载失败信息重新下载成功,则使任务不太准备
-	若先重置状态再统计,会使任务统计时缺少,无法正常监控
-*/
-func GetDownloadFailedData() {
-	defer qu.Catch()
-	logger.Debug("---开始统计下载失败信息---")
-	sess := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess)
-	ch := make(chan bool, 5)
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	field := map[string]interface{}{
-		"spidercode": 1,
-		"href":       1,
-		"site":       1,
-		"channel":    1,
-	}
-	query := map[string]interface{}{
-		"comeintime": map[string]interface{}{
-			"$gte": StartTime,
-			"$lte": EndTime,
-		},
-		"state": -1,
-	}
-	it := sess.DB("spider").C("spider_highlistdata").Find(&query).Select(&field).Iter()
-	count, _ := sess.DB("spider").C("spider_highlistdata").Find(&query).Count()
-	logger.Debug("共有下载失败数据", count, "条")
-	n := 0
-	//arr := [][]map[string]interface{}{}
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		ch <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			code := qu.ObjToString(tmp["spidercode"])
-			href := qu.ObjToString(tmp["href"])
-			site := qu.ObjToString(tmp["site"])
-			channel := qu.ObjToString(tmp["channel"])
-			lock.Lock()
-			if t := TaskMap[code]; t != nil {
-				if info := t.ErrInfo["5"]; info != nil {
-					num := qu.IntAll(info["num"])
-					num++
-					info["num"] = num
-					hrefs := info["hrefs"].([]string)
-					if len(hrefs) < 3 {
-						hrefs = append(hrefs, href)
-						info["hrefs"] = hrefs
-						t.Description += href + "\n"
-					}
-					if num >= 10 {
-						t.State = 1
-					}
-				} else {
-					t.ErrInfo["5"] = map[string]interface{}{ //ErrInfo新增下载异常信息
-						"num":   1,
-						"hrefs": []string{href},
-					}
-					t.Description += "下载异常:\n" + href + "\n"
-				}
-			} else {
-				t := &Task{
-					Code:        code,
-					Site:        site,
-					Channel:     channel,
-					ErrType:     "5",
-					ErrInfo:     map[string]map[string]interface{}{},
-					Description: "下载异常:\n" + href + "\n",
-					State:       0,
-				}
-				t.ErrInfo = map[string]map[string]interface{}{
-					"5": map[string]interface{}{
-						"num":   1,
-						"hrefs": []string{href},
-					},
-				}
-				TaskMap[code] = t
-			}
-
-			//更新state状态重新下载
-			// update := []map[string]interface{}{}
-			// update = append(update, map[string]interface{}{"_id": tmp["_id"]})
-			// update = append(update, map[string]interface{}{"$set": map[string]interface{}{"state": 0, "times": 0}})
-			// arr = append(arr, update)
-			// if len(arr) > 500 {
-			// 	tmps := arr
-			// 	MgoS.UpdateBulk("spider_highlistdata", tmps...)
-			// 	arr = [][]map[string]interface{}{}
-			// }
-			lock.Unlock()
-		}(tmp)
-		if n%100 == 0 {
-			qu.Debug("current:", n)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	// lock.Lock()
-	// if len(arr) > 0 {
-	// 	MgoS.UpdateBulk("spider_highlistdata", arr...)
-	// 	arr = [][]map[string]interface{}{}
-	// }
-	// lock.Unlock()
-	logger.Debug("---统计下载失败信息完成---")
-}
-
-//4、统计重采失败数据
-func GetRegatherFailedData() {
-	defer qu.Catch()
-	logger.Debug("---开始统计重采失败信息---")
-	sess := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess)
-	ch := make(chan bool, 5)
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	field := map[string]interface{}{
-		"spidercode": 1,
-		"href":       1,
-		"site":       1,
-		"channel":    1,
-	}
-	query := map[string]interface{}{
-		"state": map[string]interface{}{
-			"$lte": 1,
-		},
-		"from": "lua",
-		"comeintime": map[string]interface{}{
-			"$gte": StartTime,
-			"$lte": EndTime,
-		},
-	}
-	it := sess.DB("spider").C("regatherdata").Find(&query).Select(&field).Iter()
-	count, _ := sess.DB("spider").C("regatherdata").Find(&query).Count()
-	logger.Debug("共有重采失败数据", count, "条")
-	n := 0
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		ch <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			code := qu.ObjToString(tmp["spidercode"])
-			href := qu.ObjToString(tmp["href"])
-			site := qu.ObjToString(tmp["site"])
-			channel := qu.ObjToString(tmp["channel"])
-			lock.Lock()
-			if t := TaskMap[code]; t != nil {
-				if info := t.ErrInfo["4"]; info != nil {
-					num := qu.IntAll(info["num"])
-					num++
-					info["num"] = num
-					hrefs := info["hrefs"].([]string)
-					if len(hrefs) < 3 {
-						hrefs = append(hrefs, href)
-						info["hrefs"] = hrefs
-						t.Description += href + "\n"
-					}
-					if num >= 10 {
-						t.State = 1
-					}
-				} else {
-					t.ErrInfo["4"] = map[string]interface{}{ //ErrInfo新增下载异常信息
-						"num":   1,
-						"hrefs": []string{href},
-					}
-					t.Description += "运行报错:\n" + href + "\n"
-				}
-			} else {
-				t := &Task{
-					Code:        code,
-					Site:        site,
-					Channel:     channel,
-					ErrType:     "4",
-					ErrInfo:     map[string]map[string]interface{}{},
-					Description: "运行报错:\n" + href + "\n",
-					State:       0,
-				}
-				t.ErrInfo = map[string]map[string]interface{}{
-					"4": map[string]interface{}{
-						"num":   1,
-						"hrefs": []string{href},
-					},
-				}
-				TaskMap[code] = t
-			}
-			lock.Unlock()
-		}(tmp)
-		if n%100 == 0 {
-			qu.Debug("current:", n)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	// for _, task := range TaskMap {
-	// 	qu.Debug("code:", task.Code)
-	// 	qu.Debug("site:", task.Site)
-	// 	qu.Debug("channel:", task.Channel)
-	// 	qu.Debug("errtype:", task.ErrType)
-	// 	qu.Debug("description:", task.Description)
-	// 	qu.Debug("info:", task.ErrInfo)
-	// 	qu.Debug("-------------------------------------------")
-	// 	tmap := map[string]interface{}{}
-	// 	ab, _ := json.Marshal(&task)
-	// 	json.Unmarshal(ab, &tmap)
-	// 	MgoE.Save("save_aa", tmap)
-	// }
-	logger.Debug("---统计重采失败信息完成---")
-}
-
-//5、统计detail、title、publishtime异常数据
-func GetDTPErrData() {
-	defer qu.Catch()
-	logger.Debug("---开始统计信息异常数据---")
-	sess := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess)
-	ch := make(chan bool, 5)
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	field := map[string]interface{}{
-		"code":    1,
-		"href":    1,
-		"site":    1,
-		"channel": 1,
-		"field":   1,
-		"info":    1,
-	}
-	query := map[string]interface{}{
-		"comeintime": map[string]interface{}{
-			"$gte": StartTime,
-			"$lte": EndTime,
-		},
-		"level": 2, //2:error数据 1:warn数据
-	}
-	it := sess.DB("spider").C("spider_warn").Find(&query).Select(&field).Iter()
-	count, _ := sess.DB("spider").C("spider_warn").Find(&query).Count()
-	logger.Debug("共有信息异常数据", count, "条")
-	n := 0
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		ch <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			errnum := "2" //detail、 title异常
-			destmp := "正文标题异常:\n"
-			field := qu.ObjToString(tmp["field"])
-			info := qu.ObjToString(tmp["info"])
-			if field == "publishtime" { //发布时间异常
-				if info == "Publishtime Is Too Late" { //发布时间超前的不建任务
-					return
-				}
-				errnum = "3"
-				destmp = "发布时间异常:\n"
-			}
-			code := qu.ObjToString(tmp["code"])
-			href := qu.ObjToString(tmp["href"])
-			site := qu.ObjToString(tmp["site"])
-			channel := qu.ObjToString(tmp["channel"])
-
-			lock.Lock()
-			if t := TaskMap[code]; t != nil {
-				if info := t.ErrInfo[errnum]; info != nil {
-					num := qu.IntAll(info["num"])
-					num++
-					info["num"] = num
-					hrefs := info["hrefs"].([]string)
-					if len(hrefs) < 3 {
-						hrefs = append(hrefs, href)
-						info["hrefs"] = hrefs
-						t.Description += href + "\n"
-					}
-					if num >= 10 {
-						t.State = 1
-					}
-				} else {
-					t.ErrInfo[errnum] = map[string]interface{}{
-						"num":   1,
-						"hrefs": []string{href},
-					}
-					t.Description += destmp + href + "\n"
-				}
-			} else {
-				t := &Task{
-					Code:        code,
-					Site:        site,
-					Channel:     channel,
-					ErrType:     errnum,
-					ErrInfo:     map[string]map[string]interface{}{},
-					Description: destmp + href + "\n",
-					State:       0,
-				}
-				t.ErrInfo = map[string]map[string]interface{}{
-					errnum: map[string]interface{}{
-						"num":   1,
-						"hrefs": []string{href},
-					},
-				}
-				TaskMap[code] = t
-			}
-			lock.Unlock()
-		}(tmp)
-		if n%100 == 0 {
-			qu.Debug("current:", n)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	logger.Debug("---统计信息异常数据完成---")
-}
-
-//6、统计下载量异常数据
-func GetDownloadNumErrData() {
-	defer qu.Catch()
-	logger.Debug("---开始统计下载量异常数据---")
-	sess := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess)
-	ch := make(chan bool, 5)
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	field := map[string]interface{}{
-		"downloadNum":     1,
-		"code":            1,
-		"averageDownload": 1,
-		"site":            1,
-		"channel":         1,
-	}
-	query := map[string]interface{}{
-		"isok": false,
-	}
-	it := sess.DB("spider").C("spider_download").Find(&query).Select(&field).Iter()
-	count, _ := sess.DB("spider").C("spider_download").Find(&query).Count()
-	logger.Debug("共有下载量异常数据", count, "条")
-	n := 0
-	arr := [][]map[string]interface{}{}
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		ch <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			code := qu.ObjToString(tmp["code"])
-			site := qu.ObjToString(tmp["site"])
-			channel := qu.ObjToString(tmp["channel"])
-			average := qu.IntAll(tmp["averageDownload"])
-			date := "" //日期
-			dnum := 0  //下载量
-			for d, n := range tmp["downloadNum"].(map[string]interface{}) {
-				date = d
-				dnum = qu.IntAll(n)
-			}
-			lock.Lock()
-			if t := TaskMap[code]; t != nil {
-				t.ErrInfo["1"] = map[string]interface{}{ //ErrInfo新增下载异常信息
-					"num":     dnum,
-					"date":    date,
-					"average": average,
-				}
-				t.Description += "下载量异常:\n" + date + ":" + fmt.Sprint(dnum) + "\n"
-			} else {
-				t := &Task{
-					Code:        code,
-					Site:        site,
-					Channel:     channel,
-					ErrType:     "1",
-					ErrInfo:     map[string]map[string]interface{}{},
-					Description: "下载量异常:\n" + date + ":" + fmt.Sprint(dnum) + "\n",
-					State:       0,
-				}
-				t.ErrInfo = map[string]map[string]interface{}{
-					"1": map[string]interface{}{
-						"num":     dnum,
-						"date":    date,
-						"average": average,
-					},
-				}
-				TaskMap[code] = t
-			}
-			//更新isok
-			update := []map[string]interface{}{}
-			update = append(update, map[string]interface{}{"_id": tmp["_id"]})
-			update = append(update, map[string]interface{}{"$set": map[string]interface{}{"isok": true}})
-			arr = append(arr, update)
-			if len(arr) > 500 {
-				tmps := arr
-				MgoS.UpdateBulk("spider_download", tmps...)
-				arr = [][]map[string]interface{}{}
-			}
-			lock.Unlock()
-		}(tmp)
-		if n%100 == 0 {
-			qu.Debug("current:", n)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	lock.Lock()
-	if len(arr) > 0 {
-		MgoS.UpdateBulk("spider_download", arr...)
-		arr = [][]map[string]interface{}{}
-	}
-	lock.Unlock()
-	logger.Debug("---统计下载量异常数据完成---")
-}
-
-//保存统计信息
-func SaveResult() {
-	defer qu.Catch()
-	logger.Debug("---开始保存信息---")
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	ch := make(chan bool, 10)
-	savearr := []map[string]interface{}{}
-	for _, task := range TaskMap {
-		wg.Add(1)
-		ch <- true
-		go func(t *Task) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			delYearMinCode := false
-			if errInfo := t.ErrInfo; errInfo != nil {
-				//爬虫任务为下载异常、运行异常、404、时间异常、数据异常任务时,不再建该爬虫的抽查任务
-				if len(errInfo) >= 2 || (len(errInfo) == 1 && errInfo["1"] == nil) { //不是数量异常任务
-					delYearMinCode = true
-				}
-			}
-			lock.Lock()
-			has := YearMinCodeMap[t.Code]
-			lock.Unlock()
-			if delYearMinCode {
-				lock.Lock()
-				delete(YearMinCodeMap, t.Code)
-				lock.Unlock()
-				go MgoE.Update("luayearmincode", map[string]interface{}{"code": t.Code}, map[string]interface{}{"$set": map[string]interface{}{"send": true}}, false, false)
-			} else if has { //luayearmincode中爬虫任务删除
-				return
-			}
-
-			result := map[string]interface{}{}
-			result["code"] = t.Code
-			result["site"] = t.Site
-			result["channel"] = t.Channel
-			result["errtype"] = t.ErrType
-			result["errinfo"] = t.ErrInfo
-			result["description"] = t.Description
-			result["comeintime"] = time.Now().Unix()
-			result["state"] = t.State
-			//result["updatetime"] = time.Now().Unix()
-			lua, _ := MgoE.FindOne("luaconfig", map[string]interface{}{"code": t.Code})
-			if lua != nil && len(*lua) > 0 {
-				result["modifyid"] = (*lua)["createuserid"]
-				result["modify"] = (*lua)["createuser"]
-				result["event"] = (*lua)["event"]
-			}
-			lock.Lock()
-			if len(result) > 0 {
-				savearr = append(savearr, result)
-			}
-			if len(savearr) > 500 {
-				tmps := savearr
-				MgoE.SaveBulk("luataskinfo", tmps...)
-				savearr = []map[string]interface{}{}
-			}
-			lock.Unlock()
-		}(task)
-	}
-	wg.Wait()
-	lock.Lock()
-	if len(savearr) > 0 {
-		MgoE.SaveBulk("luataskinfo", savearr...)
-		savearr = []map[string]interface{}{}
-	}
-	lock.Unlock()
-	TaskMap = map[string]*Task{} //重置
-	logger.Debug("---保存信息完成---")
-}
-
-//创建任务
-func CreateLuaTask() {
-	defer qu.Catch()
-	logger.Debug("---开始创建任务---")
-	sess := MgoE.GetMgoConn()
-	defer MgoE.DestoryMongoConn(sess)
-	ch := make(chan bool, 1)
-	wg := &sync.WaitGroup{}
-	field := map[string]interface{}{
-		"comeintime": 0,
-		//"updatetime": 0,
-	}
-	query := map[string]interface{}{
-		"comeintime": map[string]interface{}{
-			"$gte": GetTime(0),
-		},
-	}
-	it := sess.DB("editor").C("luataskinfo").Find(&query).Select(&field).Iter()
-	count, _ := sess.DB("editor").C("luataskinfo").Find(&query).Count()
-	logger.Debug("共有异常爬虫数据量", count, "条")
-	n := 0
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		ch <- true
-		wg.Add(1)
-		func(tmp map[string]interface{}) { //目前不用多线程
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			id := mgo.BsonIdToSId(tmp["_id"])
-			code := qu.ObjToString(tmp["code"])
-			site := qu.ObjToString(tmp["site"])
-			channel := qu.ObjToString(tmp["channel"])
-			description := qu.ObjToString(tmp["description"])
-			errtype := qu.ObjToString(tmp["errtype"])
-			errinfo := tmp["errinfo"].(map[string]interface{})
-			modifyid := qu.ObjToString(tmp["modifyid"])
-			modify := qu.ObjToString(tmp["modify"])
-			event := qu.IntAll(tmp["event"])
-			state := qu.IntAll(tmp["state"])
-			//初始化一些任务的变量
-			n_imin := 0   //最小下载量
-			n_itimes := 0 //任务出现特别紧急的次数
-			if state == 1 {
-				n_itimes = 1
-			}
-			n_idn := 0        //下载量
-			n_sdt := ""       //下载量对应的日期
-			n_surgency := "1" //紧急程度
-			//
-			dnerr := errinfo["1"]
-			if errtype == "1" && dnerr != nil { //只有任务类型是数据量异常时,才记录数据量信息
-				info := errinfo["1"].(map[string]interface{})
-				n_imin = qu.IntAll(info["average"])
-				n_idn = qu.IntAll(info["num"])
-				n_sdt = qu.ObjToString(info["date"])
-			}
-			if errtype == "8" || errtype == "7" || errtype == "6" {
-				n_surgency = "4"
-			}
-			query := map[string]interface{}{
-				"s_code": code,
-				"i_state": map[string]interface{}{
-					"$in": []int{0, 1, 2, 3, 5},
-				},
-			}
-			list, _ := MgoE.Find("task", query, nil, nil, false, -1, -1)
-			if list != nil && len(*list) > 0 { //已有任务
-				if len(*list) > 1 {
-					logger.Error("Code:", code, "任务异常")
-					MgoE.Save("luacreatetaskerr", map[string]interface{}{
-						"code":       code,
-						"comeintime": time.Now().Unix(),
-						"tasknum":    len(*list),
-					})
-					return
-				}
-				task := (*list)[0]
-				o_istate := qu.IntAll(task["i_state"])              //已有任务的状态
-				o_stype := qu.ObjToString(task["s_type"])           //已有任务的类型
-				o_sdescript := qu.ObjToString(task["s_descript"])   //已有任务的描述
-				o_addinfoid, _ := task["addinfoid"].([]interface{}) //luataskinfo信息
-				o_lcomplete := qu.Int64All(task["l_complete"])      //已有任务的最迟完成时间
-				o_surgency := qu.ObjToString(task["s_urgency"])     //已有任务的紧急度
-				o_iurgency, _ := strconv.Atoi(o_surgency)           //已有任务的紧急度int类型
-				o_itimes := qu.IntAll(task["i_times"])              //已有任务出现的次数
-				//
-				o_addinfoid = append(o_addinfoid, id)                                                                          //追加addinfoid信息
-				o_sdescript += time.Now().Format(qu.Date_Short_Layout) + "追加描述:------------------------------\n" + description //追加描述
-				set := map[string]interface{}{}
-				//MgoE.Update("task", q, s, false, false)
-				if state == 1 { //新任务为待处理
-					if o_istate < 2 {
-						if errtype > o_stype { //历史任务是待确认、待处理状态且任务类型等级低于新建任务,任务类型替换为新任务类型
-							o_stype = errtype
-						}
-						o_surgency = n_surgency //更新紧急度
-						o_itimes++
-						set = map[string]interface{}{
-							"addinfoid":  o_addinfoid,
-							"s_descript": o_sdescript,
-							/// "i_min":          n_imin,
-							// "i_num":          n_idn,
-							// "s_downloadtime": n_sdt,
-							"i_state":      state,
-							"l_complete":   CompleteTime(o_surgency),
-							"s_urgency":    o_surgency,
-							"s_type":       o_stype,
-							"i_times":      o_itimes,
-							"l_updatetime": time.Now().Unix(),
-						}
-					} else { //历史任务类型为未通过或待审核,更新信息
-						set = map[string]interface{}{
-							"addinfoid":    o_addinfoid,
-							"s_descript":   o_sdescript,
-							"l_updatetime": time.Now().Unix(),
-						}
-					}
-				} else { //新任务为待确认
-					if o_istate == 0 { //历史任务为待确认
-						if o_stype == "1" { //历史任务为数量异常待确认
-							if errtype == "1" { //新任务为数量异常待确认,按紧急程度递增,次数递增
-								o_iurgency++         //紧急度加一级
-								if o_iurgency >= 4 { //出现特别紧急的状态,记录次数itimes
-									o_itimes++
-									o_iurgency = 4
-								}
-								o_surgency = fmt.Sprint(o_iurgency)
-								o_lcomplete = CompleteTime(o_surgency)
-								if o_itimes >= 5 { //特别紧急的次数出现5次,自动创建待处理的任务(排除有待审核任务的可能)
-									state = 1
-								}
-								set = map[string]interface{}{
-									"addinfoid":      o_addinfoid,
-									"s_descript":     o_sdescript,
-									"i_min":          n_imin,
-									"i_num":          n_idn,
-									"s_downloadtime": n_sdt,
-									"i_state":        state,
-									"l_complete":     o_lcomplete,
-									"s_urgency":      o_surgency,
-									"s_type":         errtype,
-									"i_times":        o_itimes,
-									"l_updatetime":   time.Now().Unix(),
-								}
-							} else { //新任务为其他异常类型待确认,紧急程度:紧急;
-								if o_iurgency < 4 { //数量异常,特别紧急以下
-									o_surgency = "1"
-								} else {
-									o_surgency = "2"
-								}
-								set = map[string]interface{}{
-									"addinfoid":      o_addinfoid,
-									"s_descript":     o_sdescript,
-									"i_min":          n_imin,
-									"i_num":          n_idn,
-									"s_downloadtime": n_sdt,
-									"i_state":        state,
-									"l_complete":     CompleteTime(o_surgency),
-									"s_urgency":      o_surgency,
-									"s_type":         errtype,
-									"l_updatetime":   time.Now().Unix(),
-								}
-							}
-						} else { //其他任务类型待确认,历史任务紧急程度+1,次数+1,任务类型更新为异常等级高者且连续4天变为待处理
-							if errtype > o_stype {
-								o_stype = errtype
-							}
-							o_iurgency++         //紧急度加一级
-							if o_iurgency >= 4 { //出现特别紧急的状态,记录次数itimes
-								o_itimes++
-								o_iurgency = 4
-								state = 1 //特别紧急,任务变为待处理
-							}
-							o_surgency = fmt.Sprint(o_iurgency)
-							set = map[string]interface{}{
-								"addinfoid":      o_addinfoid,
-								"s_descript":     o_sdescript,
-								"i_min":          n_imin,
-								"i_num":          n_idn,
-								"s_downloadtime": n_sdt,
-								"i_state":        state,
-								"l_complete":     CompleteTime(o_surgency),
-								"s_urgency":      o_surgency,
-								"s_type":         o_stype,
-								"i_times":        o_itimes,
-								"l_updatetime":   time.Now().Unix(),
-							}
-						}
-					} else { //历史任务为待处理以上,只追加描述
-						set = map[string]interface{}{
-							"addinfoid":      o_addinfoid,
-							"s_descript":     o_sdescript,
-							"i_min":          n_imin,
-							"i_num":          n_idn,
-							"s_downloadtime": n_sdt,
-							"l_updatetime":   time.Now().Unix(),
-						}
-					}
-				}
-				MgoE.Update("task", map[string]interface{}{"_id": task["_id"]}, map[string]interface{}{"$set": set}, false, false)
-			} else {
-				SaveTask(code, site, channel, modifyid, modify, description, n_surgency, n_sdt, errtype, state, n_imin, n_idn, event, n_itimes, []string{id})
-			}
-		}(tmp)
-		if n%100 == 0 {
-			qu.Debug("current:", n)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	logger.Debug("---任务创建完成---")
-}
-
-func SaveTask(code, site, channel, modifyid, modify, description, urgency, downloadtime, errtype string, state, min, downloadnum, event, times int, addinfoid []string) {
-	defer qu.Catch()
-	result := map[string]interface{}{}
-	// if stateNum := UserTaskNum[modify]; stateNum == nil {
-	// 	tmp := map[string]int{fmt.Sprint(state): 1}
-	// 	UserTaskNum[modify] = tmp
-	// } else {
-	// 	stateNum[fmt.Sprint(state)]++
-	// }
-	// if state == 1 { //待处理任务,紧急程度定为特别紧急
-	// 	urgency = "4"
-	// }
-	result["s_code"] = code
-	result["s_site"] = site
-	result["s_channel"] = channel
-	result["s_modifyid"] = modifyid
-	result["s_modify"] = modify
-	result["s_descript"] = description
-	result["i_min"] = min
-	result["i_num"] = downloadnum //下载量
-	result["s_urgency"] = urgency
-	result["i_state"] = state
-	result["i_event"] = event
-	result["s_downloadtime"] = downloadtime //下载量对应的日期
-	result["l_comeintime"] = time.Now().Unix()
-	result["l_updatetime"] = time.Now().Unix()
-	result["l_complete"] = CompleteTime(urgency)
-	//result["s_date"] = time.Now().Format(qu.Date_Short_Layout) //任务创建字符串日期
-	result["i_times"] = times       //为了方便编辑器对次数的排序,记录当前的次数
-	result["s_type"] = errtype      //任务类型
-	result["addinfoid"] = addinfoid //信息id
-	result["s_source"] = "程序"
-	MgoE.Save("task", result)
-}
-
-func SaveUserCreateTaskNum() {
-	defer qu.Catch()
-	for user, sn := range UserTaskNum {
-		save := map[string]interface{}{}
-		save["user"] = user
-		save["comeintime"] = time.Now().Unix()
-		for s, n := range sn {
-			save[s] = n
-		}
-		MgoE.Save("luausertask", save)
-	}
-	UserTaskNum = map[string]map[string]int{}
-}
-
-//重置前一周内未下载成功的数据(一天3次未下成功的数据可以连续下一周)
-func ResetDataState() {
-	defer qu.Catch()
-	logger.Info("-----更新数据状态-----")
-	sess := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess)
-	ch := make(chan bool, 3)
-	wg := &sync.WaitGroup{}
-	lock := &sync.Mutex{}
-	query := map[string]interface{}{
-		"comeintime": map[string]interface{}{
-			"$gte": GetTime(-DayNum),
-			"$lt":  GetTime(0),
-		},
-		"state": -1,
-	}
-	field := map[string]interface{}{
-		"_id": 1,
-	}
-	it := sess.DB("spider").C("spider_highlistdata").Find(&query).Select(&field).Iter()
-	count, _ := sess.DB("spider").C("spider_highlistdata").Find(&query).Count()
-	logger.Info("更新数据状态数量:", count)
-	n := 0
-	arr := [][]map[string]interface{}{}
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		ch <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			update := []map[string]interface{}{}
-			update = append(update, map[string]interface{}{"_id": tmp["_id"]})
-			update = append(update, map[string]interface{}{"$set": map[string]interface{}{"times": 0, "state": 0}})
-			lock.Lock()
-			arr = append(arr, update)
-			if len(arr) > 500 {
-				tmps := arr
-				MgoS.UpdateBulk("spider_highlistdata", tmps...)
-				arr = [][]map[string]interface{}{}
-			}
-			lock.Unlock()
-		}(tmp)
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	lock.Lock()
-	if len(arr) > 0 {
-		MgoS.UpdateBulk("spider_highlistdata", arr...)
-		arr = [][]map[string]interface{}{}
-	}
-	lock.Unlock()
-	logger.Info("-----更新数据状态完毕-----")
-}

+ 154 - 0
src/timetask/random.go

@@ -0,0 +1,154 @@
+package timetask
+
+import (
+	qu "qfw/util"
+	"strings"
+	"time"
+	"util"
+)
+
+type WarnInfo struct {
+	Fields   map[string]bool
+	MaxLevel int
+	Data     interface{}
+	Site     interface{}
+	Channel  interface{}
+	Title    interface{}
+	Info     interface{}
+	Code     interface{}
+	Href     interface{}
+}
+
+func GetSpiderWarnData() {
+	defer qu.Catch()
+	qu.Debug("准备spider_warn_err数据")
+	stime := util.GetTime(-1)
+	etime := util.GetTime(0)
+	if time.Now().Weekday().String() == "Monday" {
+		stime = util.GetTime(-3)
+	}
+	query := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": stime,
+			"$lt":  etime,
+		},
+		"info": map[string]interface{}{
+			"$in": []string{"Field Value Contains Random Code", "Publishtime Is Too Early", "Publishtime Is Too Late", "Field Value Not Contains Chinese"},
+		},
+	}
+	tmp := map[string]*WarnInfo{}
+	list, _ := util.MgoS.Find("spider_warn", query, nil, nil, false, -1, -1)
+	qu.Debug("query:", query, len(*list))
+	for _, l := range *list {
+		href := qu.ObjToString(l["href"])
+		level := qu.IntAll(l["level"])
+		field := qu.ObjToString(l["field"])
+		if field == "publishtime" { //特殊处理publishtime字段的level(保存服务中publishtime异常数据入bidding库,level不能为2)
+			level = 1
+		}
+		if warnInfo := tmp[href]; warnInfo == nil {
+			warnInfo = &WarnInfo{
+				Fields:   map[string]bool{field: true},
+				MaxLevel: level,
+				Data:     l["data"],
+				Site:     l["site"],
+				Channel:  l["channel"],
+				Title:    l["title"],
+				Info:     l["info"],
+				Code:     l["code"],
+				Href:     href,
+			}
+			tmp[href] = warnInfo
+		} else {
+			warnInfo.Fields[field] = true
+			if warnInfo.MaxLevel < level {
+				warnInfo.MaxLevel = level
+			}
+		}
+	}
+	for _, wi := range tmp {
+		fields := []string{}
+		for f, _ := range wi.Fields {
+			fields = append(fields, f)
+		}
+		util.MgoS.Save("spider_warn_err", map[string]interface{}{
+			"field":      strings.Join(fields, ","),
+			"level":      wi.MaxLevel,
+			"site":       wi.Site,
+			"channel":    wi.Channel,
+			"title":      wi.Title,
+			"comeintime": time.Now().Unix(),
+			"info":       wi.Info,
+			"code":       wi.Code,
+			"href":       wi.Href,
+			"data":       wi.Data,
+			"ok":         false,
+		})
+	}
+}
+
+/*
+	每天定时推送含乱码数据
+*/
+// var (
+// 	RandomDataPushCron string
+// 	Gmail              *gm.GmailAuth
+// 	To                 string
+// )
+
+// type FileWrite struct {
+// 	Byte *bytes.Buffer
+// }
+
+// func (fw *FileWrite) Write(p []byte) (n int, err error) {
+// 	n, err = fw.Byte.Write(p)
+// 	return
+// }
+//PushRandomData 推送乱码数据
+// func PushRandomData() {
+// 	defer qu.Catch()
+// 	query := map[string]interface{}{
+// 		//"comeintime": map[string]interface{}{
+// 		//	"$gte": GetTime(-1),
+// 		//	"$lt":  GetTime(0),
+// 		//},
+// 		"info": map[string]interface{}{
+// 			"$in": []string{"Field Value Not Contains Chinese"},
+// 		},
+// 	}
+// 	list, _ := MgoS.Find("spider_warn", query, nil, nil, false, -1, -1)
+// 	if len(*list) > 0 {
+// 		file := xlsx.NewFile()
+// 		sheet, _ := file.AddSheet("乱码数据")
+// 		row := sheet.AddRow()
+// 		row.AddCell().SetValue("站点")
+// 		row.AddCell().SetValue("栏目")
+// 		row.AddCell().SetValue("爬虫")
+// 		row.AddCell().SetValue("字段")
+// 		row.AddCell().SetValue("异常等级")
+// 		row.AddCell().SetValue("标题")
+// 		row.AddCell().SetValue("链接")
+// 		for _, l := range *list {
+// 			textRow := sheet.AddRow()
+// 			textRow.AddCell().SetValue(qu.ObjToString(l["site"]))
+// 			textRow.AddCell().SetValue(qu.ObjToString(l["channel"]))
+// 			textRow.AddCell().SetValue(qu.ObjToString(l["code"]))
+// 			textRow.AddCell().SetValue(qu.ObjToString(l["field"]))
+// 			level := qu.IntAll(l["level"])
+// 			if level == 1 {
+// 				textRow.AddCell().SetValue("警告")
+// 			} else if level == 2 {
+// 				textRow.AddCell().SetValue("错误")
+// 			}
+// 			textRow.AddCell().SetValue(qu.ObjToString(l["title"]))
+// 			textRow.AddCell().SetValue(qu.ObjToString(l["href"]))
+// 		}
+// 		fw := &FileWrite{
+// 			Byte: &bytes.Buffer{},
+// 		}
+// 		file.Write(fw)
+// 		bt := fw.Byte.Bytes()
+// 		gm.GSendMail_Bq("jy@jianyu360.cn", To, "", "", "乱码数据统计", "", "统计报表.xlsx", bt, Gmail)
+// 	}
+
+// }

+ 11 - 10
src/summary.go → src/timetask/summary.go

@@ -1,9 +1,10 @@
-package main
+package timetask
 
 import (
 	qu "qfw/util"
 	"sync"
 	"time"
+	"util"
 )
 
 func SummaryCode() {
@@ -16,9 +17,9 @@ func SummaryCode() {
 	sm_ch1 := make(chan bool, 5)
 	sm_wg1 := &sync.WaitGroup{}
 	sm_lock1 := &sync.Mutex{}
-	sm_stime, sm_etime := GetTime(-1), GetTime(0)
-	sess_s := MgoS.GetMgoConn()
-	defer MgoS.DestoryMongoConn(sess_s)
+	sm_stime, sm_etime := util.GetTime(-1), util.GetTime(0)
+	sess_s := util.MgoS.GetMgoConn()
+	defer util.MgoS.DestoryMongoConn(sess_s)
 	timestr := qu.FormatDateByInt64(&sm_stime, qu.Date_Short_Layout)
 	query := map[string]interface{}{
 		"publishtime": map[string]interface{}{
@@ -101,8 +102,8 @@ func SummaryCode() {
 	sm_wg2 := &sync.WaitGroup{}
 	sm_lock2 := &sync.Mutex{}
 	arr := []map[string]interface{}{}
-	sess_e := MgoE.GetMgoConn()
-	defer MgoE.DestoryMongoConn(sess_e)
+	sess_e := util.MgoE.GetMgoConn()
+	defer util.MgoE.DestoryMongoConn(sess_e)
 	fe := map[string]interface{}{
 		"code":         1,
 		"event":        1,
@@ -143,7 +144,7 @@ func SummaryCode() {
 					"$lte": sm_etime,
 				},
 			}
-			data, _ := MgoS.FindOne("spider_sitecheck", q) //spider_sitecheck只记录了错误状态码爬虫
+			data, _ := util.MgoS.FindOne("spider_sitecheck", q) //spider_sitecheck只记录了错误状态码爬虫
 			if data != nil && len(*data) > 0 {
 				result["statuscode"] = qu.Int64All((*data)["statuscode"])
 			} else {
@@ -153,7 +154,7 @@ func SummaryCode() {
 			errinfo := map[string]interface{}{}
 			fnMap_lev1 := map[string]int{}
 			fnMap_lev2 := map[string]int{}
-			warnDatas, _ := MgoS.Find("spider_warn", q, nil, `{"field":1,"level":1}`, false, -1, -1)
+			warnDatas, _ := util.MgoS.Find("spider_warn", q, nil, `{"field":1,"level":1}`, false, -1, -1)
 			for _, d := range *warnDatas {
 				field := qu.ObjToString(d["field"])
 				level := qu.IntAll(d["level"])
@@ -188,7 +189,7 @@ func SummaryCode() {
 			arr = append(arr, result)
 			if len(arr) > 500 {
 				tmps := arr
-				MgoS.SaveBulk("spider_summaryinfo", tmps...)
+				util.MgoS.SaveBulk("spider_summaryinfo", tmps...)
 				arr = []map[string]interface{}{}
 			}
 			sm_lock2.Unlock()
@@ -200,7 +201,7 @@ func SummaryCode() {
 	}
 	sm_wg2.Wait()
 	if len(arr) > 0 {
-		MgoS.SaveBulk("spider_summaryinfo", arr...)
+		util.MgoS.SaveBulk("spider_summaryinfo", arr...)
 		arr = []map[string]interface{}{}
 	}
 	qu.Debug("上架爬虫信息汇总结束...")

+ 150 - 0
src/timetask/wxworkwarn.go

@@ -0,0 +1,150 @@
+package timetask
+
+import (
+	"bytes"
+	"fmt"
+	"math"
+	"net/http"
+	qu "qfw/util"
+	"strings"
+	"time"
+	"util"
+)
+
+var LuaUserMap map[string]map[string]string
+var LuaUserInfoMap map[string]*LuaUserInfo
+
+type UserTextInfo struct {
+	Username             string
+	FailedTaskCount      int
+	FailedTaskOverdueDay int
+	HeartErrCount        int
+	NoCollectDataDay     int
+}
+
+type LuaUserInfo struct {
+	Username string
+	Mobile   string
+}
+
+//
+var TitleContentModel = `
+	截止目前,爬虫共有未通过任务<font color=\"warning\">%d个</font>,异常心跳爬虫<font color=\"warning\">%d个</font>。请及时处理!\n
+`
+var UserContentModel = `
+     >人员:<font color=\"warning\">%s</font>
+     >未通过任务:<font color=\"warning\">%d个</font><font color=\"info\">(最早任务已逾期%d天)</font>
+     >异常心跳爬虫:<font color=\"warning\">%d个</font><font color=\"info\">(已有爬虫%d天未采集数据)</font>\n
+`
+var MarkdownModel = `{
+    "msgtype": "markdown",
+    "markdown": {
+        "content": "%s"
+    }
+}`
+var TextModel = `{
+    "msgtype": "text",
+    "text": {
+        "content": "",
+        "mentioned_mobile_list":[%s]
+    }
+}`
+
+func GetLuaUserInfo() {
+	LuaUserInfoMap = map[string]*LuaUserInfo{}
+	for eu, info := range LuaUserMap {
+		LuaUserInfoMap[eu] = &LuaUserInfo{
+			Username: info["username"],
+			Mobile:   info["mobile"],
+		}
+	}
+}
+
+// SendInfoToWxWork
+func SendInfoToWxWork() {
+	defer qu.Catch()
+	qu.Debug("企业微信发送提示信息")
+	failedTaskCount, heartCodeCount := 0, 0   //总未通过任务个数,总待处理心跳异常爬虫个数
+	userTextMap := map[string]*UserTextInfo{} //key:mobile
+	for eu, userInfo := range LuaUserInfoMap {
+		textInfo := &UserTextInfo{}
+		textInfo.Username = userInfo.Username
+		//1、未通过任务信息
+		list_task, _ := util.MgoE.Find("task",
+			map[string]interface{}{"s_modify": eu, "i_state": 5},
+			map[string]interface{}{"l_complete": 1},
+			map[string]interface{}{"l_complete": 1},
+			false, -1, -1)
+		taskLen := len(*list_task)
+		textInfo.FailedTaskCount = taskLen //个人未通过任务个数赋值
+		failedTaskCount += taskLen         //总未通过个数++
+		if taskLen > 0 {
+			complete := qu.Int64All((*list_task)[0]["l_complete"]) //未通过任务中最迟完成时间最早的任务
+			odDay := int(math.Floor(float64(time.Now().Unix()-complete) / float64(86400)))
+			textInfo.FailedTaskOverdueDay = odDay //个人未通过任务最早逾期天数赋值
+		}
+		//2、爬虫心跳信息
+		query := map[string]interface{}{
+			"modifyuser": eu,
+			"del":        false,
+			"$or": []interface{}{
+				map[string]interface{}{
+					"event": map[string]interface{}{
+						"$ne": 7500,
+					},
+					"list": map[string]interface{}{
+						"$lte": util.GetTime(0),
+					},
+				},
+				map[string]interface{}{
+					"event": 7500,
+					"list": map[string]interface{}{
+						"$lte": util.GetTime(-1),
+					},
+				},
+			},
+		}
+		qu.Debug("heart query:", query)
+		list_code, _ := util.MgoS.Find("spider_heart",
+			query,
+			map[string]interface{}{"list": 1},
+			map[string]interface{}{"list": 1},
+			false, -1, -1)
+		codeLen := len(*list_code)
+		textInfo.HeartErrCount = codeLen //个人异常心跳爬虫个数赋值
+		heartCodeCount += codeLen        //总异常心跳爬虫个数++
+		if codeLen > 0 {
+			listTime := qu.Int64All((*list_code)[0]["list"]) //未通过任务中最迟完成时间最早的任务
+			ncDay := int(math.Floor(float64(time.Now().Unix()-listTime) / float64(86400)))
+			textInfo.NoCollectDataDay = ncDay //个人未通过任务最早逾期天数赋值
+		}
+		userTextMap[userInfo.Mobile] = textInfo
+	}
+	//拼接content
+	resultContent := fmt.Sprintf(TitleContentModel, failedTaskCount, heartCodeCount)
+	mobileArr := []string{}
+	for mobile, t := range userTextMap {
+		mobileArr = append(mobileArr, "\""+mobile+"\"")
+		resultContent += fmt.Sprintf(UserContentModel, t.Username, t.FailedTaskCount, t.FailedTaskOverdueDay, t.HeartErrCount, t.NoCollectDataDay)
+	}
+	msg := fmt.Sprintf(MarkdownModel, resultContent)
+	qu.Debug("msg", msg)
+	toUserMsg := fmt.Sprintf(TextModel, strings.Join(mobileArr, ","))
+	qu.Debug("toUserMsg", toUserMsg)
+	resp1, err := http.Post(
+		"https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=97850772-88d0-4544-a2c3-6201aeddff9e",
+		"application/json",
+		bytes.NewBuffer([]byte(toUserMsg)),
+	)
+	defer resp1.Body.Close()
+	resp2, err := http.Post(
+		"https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=97850772-88d0-4544-a2c3-6201aeddff9e",
+		"application/json",
+		bytes.NewBuffer([]byte(msg)),
+	)
+	defer resp2.Body.Close()
+	if err != nil {
+		fmt.Println("request error:", err)
+	}
+
+}

+ 12 - 4
src/user.json

@@ -1,6 +1,14 @@
 {
-	"weixingyue": "weixingyue",
-	"jiaoyubo": "jiaoyubo",
-	"ssc": "ssc",
-	"lyf": "lyf"
+	"ssc":{
+		"username": "施顺才",
+		"mobile": "13523457747"
+	},
+	"lyf":{
+		"username": "刘一帆",
+		"mobile": "15896901897"
+	},
+	"jiaoyubo":{
+		"username": "焦宇波",
+		"mobile": "15516197109"
+	}
 }

+ 76 - 0
src/util/config.go

@@ -0,0 +1,76 @@
+package util
+
+import (
+	mgo "mongodb"
+	qu "qfw/util"
+	"regexp"
+)
+
+var (
+	Config             map[string]interface{}
+	MgoE               *mgo.MongodbSim //editor
+	MgoS               *mgo.MongodbSim //spider
+	MgoPy              *mgo.MongodbSim //py_spider
+	CommServers        map[string]interface{}
+	BidServers         map[string]interface{}
+	UploadEvents       map[int]string
+	RandomDataPushCron string
+	QyworkRemindCron   string
+	StartTaskCron      string      //任务开始
+	CodeSummaryCron    string      //每天统计爬虫信息
+	CloseNum           int         //关闭几天的任务
+	DayNum             int         //更新数据天数
+	CodeEventModel     map[int]int //节点对应的采集模式0:老模式;1:新模式
+	CodeEventWorking   map[int]int //节点对应的采集模式0:高性能模式;1:队列模式
+
+)
+var TimeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
+
+func InitMgo() {
+	spider := Config["spider"].(map[string]interface{})
+	MgoS = &mgo.MongodbSim{
+		MongodbAddr: qu.ObjToString(spider["addr"]),
+		DbName:      qu.ObjToString(spider["db"]),
+		Size:        qu.IntAll(spider["size"]),
+	}
+	MgoS.InitPool()
+	pyspider := Config["spider"].(map[string]interface{})
+	MgoPy = &mgo.MongodbSim{
+		MongodbAddr: qu.ObjToString(pyspider["addr"]),
+		DbName:      qu.ObjToString(pyspider["db"]),
+		Size:        qu.IntAll(pyspider["size"]),
+	}
+	MgoPy.InitPool()
+	editor := Config["editor"].(map[string]interface{})
+	MgoE = &mgo.MongodbSim{
+		MongodbAddr: qu.ObjToString(editor["addr"]),
+		DbName:      qu.ObjToString(editor["db"]),
+		Size:        qu.IntAll(editor["size"]),
+	}
+	MgoE.InitPool()
+}
+
+func InitOther() {
+	StartTaskCron = qu.ObjToString(Config["startaskcron"])
+	CodeSummaryCron = qu.ObjToString(Config["codesummarycron"])
+	RandomDataPushCron = qu.ObjToString(Config["randomdatapushcron"])
+	QyworkRemindCron = qu.ObjToString(Config["qyworkremindcron"])
+	CloseNum = qu.IntAll(Config["closenum"])
+	DayNum = qu.IntAll(Config["daynum"])
+	MsgServers := Config["msgservers"].(map[string]interface{})
+	CommServers = MsgServers["comm"].(map[string]interface{})
+	BidServers = MsgServers["bid"].(map[string]interface{})
+	//event信息
+	eventsinfo := Config["eventsinfo"].(map[string]interface{})
+	UploadEvents = map[int]string{}
+	CodeEventModel = map[int]int{}
+	CodeEventWorking = map[int]int{}
+	for event, info := range eventsinfo {
+		eventTmp := qu.IntAll(event)
+		infoMap := info.(map[string]interface{})
+		UploadEvents[eventTmp] = qu.ObjToString(infoMap["server"])
+		CodeEventModel[eventTmp] = qu.IntAll(infoMap["model"])
+		CodeEventWorking[eventTmp] = qu.IntAll(infoMap["work"])
+	}
+	//qu.Debug(UploadEvents, CodeEventModel, CodeEventWorking)
+}

+ 136 - 0
src/util/msgservice.go

@@ -0,0 +1,136 @@
+package util
+
+import (
+	"encoding/json"
+	"errors"
+	mu "mfw/util"
+	qu "qfw/util"
+	"time"
+)
+
+//
+type DynamicIPMap struct {
+	Code        string
+	InvalidTime int64
+}
+
+var Msclient *mu.Client
+var MsclientBid *mu.Client
+var Alldownloader map[string]DynamicIPMap = make(map[string]DynamicIPMap)
+var AlldownloaderBid map[string]DynamicIPMap = make(map[string]DynamicIPMap)
+
+//
+func processevent(p *mu.Packet) {
+	defer mu.Catch()
+	var data []byte
+	switch p.Event {
+	case mu.SERVICE_DOWNLOAD_APPEND_NODE:
+		data = p.GetBusinessData()
+		//log.Println("获取动态地址:", len(data), string(data))
+		for i := 0; i < len(data)/8; i++ {
+			code := string(data[i*8 : (i+1)*8])
+			Alldownloader[code] = DynamicIPMap{
+				Code:        code,
+				InvalidTime: time.Now().Unix() + 60*10,
+			}
+		}
+	case mu.SERVICE_DOWNLOAD_DELETE_NODE:
+		data = p.GetBusinessData()
+		//log.Println("删除动态地址:", len(data), string(data))
+		for i := 0; i < len(data)/8; i++ {
+			code := string(data[i*8 : (i+1)*8])
+			delete(Alldownloader, code)
+		}
+	}
+}
+func processeventbid(p *mu.Packet) {
+	defer mu.Catch()
+	var data []byte
+	switch p.Event {
+	case mu.SERVICE_DOWNLOAD_APPEND_NODE:
+		data = p.GetBusinessData()
+		//log.Println("获取动态地址:", len(data), string(data))
+		for i := 0; i < len(data)/8; i++ {
+			code := string(data[i*8 : (i+1)*8])
+			AlldownloaderBid[code] = DynamicIPMap{
+				Code:        code,
+				InvalidTime: time.Now().Unix() + 60*10,
+			}
+		}
+	case mu.SERVICE_DOWNLOAD_DELETE_NODE:
+		data = p.GetBusinessData()
+		//log.Println("删除动态地址:", len(data), string(data))
+		for i := 0; i < len(data)/8; i++ {
+			code := string(data[i*8 : (i+1)*8])
+			delete(AlldownloaderBid, code)
+		}
+	}
+}
+
+//
+func gc4Alldownloader() {
+	n := time.Now().Unix()
+	for _, v := range Alldownloader {
+		if v.InvalidTime < n {
+			delete(Alldownloader, v.Code)
+		}
+	}
+	time.AfterFunc(1*time.Minute, gc4Alldownloader)
+}
+func gc4AlldownloaderBid() {
+	n := time.Now().Unix()
+	for _, v := range AlldownloaderBid {
+		if v.InvalidTime < n {
+			delete(AlldownloaderBid, v.Code)
+		}
+	}
+	time.AfterFunc(1*time.Minute, gc4AlldownloaderBid)
+}
+
+//初始化,启动消息客户端
+func InitMsgClient(serveraddr, serveraddrbid, name, namebid string) {
+	Msclient, _ = mu.NewClient(&mu.ClientConfig{ClientName: name,
+		MsgServerAddr:   serveraddr,
+		EventHandler:    processevent,
+		CanHandleEvents: []int{mu.SERVICE_DOWNLOAD_APPEND_NODE, mu.SERVICE_DOWNLOAD_DELETE_NODE},
+		ReadBufferSize:  10,
+		WriteBufferSize: 10,
+	})
+	go gc4Alldownloader() //comm
+
+	MsclientBid, _ = mu.NewClient(&mu.ClientConfig{ClientName: namebid,
+		MsgServerAddr:   serveraddrbid,
+		EventHandler:    processeventbid,
+		CanHandleEvents: []int{mu.SERVICE_DOWNLOAD_APPEND_NODE, mu.SERVICE_DOWNLOAD_DELETE_NODE},
+		ReadBufferSize:  10,
+		WriteBufferSize: 10,
+	})
+	go gc4AlldownloaderBid() //bid
+}
+
+//爬虫上下架
+func UpdateSpiderByCodeState(code, state string, event int) (bool, error) {
+	msgid := mu.UUID(8)
+	data := map[string]interface{}{
+		"code":  code,
+		"state": state,
+	}
+	rep := map[string]interface{}{}
+	var bs []byte
+	var err error
+	if UploadEvents[event] == "bid" { //bid:803
+		bs, err = MsclientBid.Call("", msgid, event, mu.SENDTO_TYPE_ALL_RECIVER, data, 60)
+	} else { //comm:801
+		bs, err = Msclient.Call("", msgid, event, mu.SENDTO_TYPE_ALL_RECIVER, data, 60)
+	}
+	if err != nil {
+		return false, err
+	} else {
+		json.Unmarshal(bs, &rep)
+		b, _ := rep["b"].(bool)
+		if !b {
+			err = errors.New(qu.ObjToString(rep["err"]))
+		}
+		return b, err
+	}
+}

+ 1 - 7
src/util.go → src/util/util.go

@@ -1,16 +1,10 @@
-package main
+package util
 
 import (
 	qu "qfw/util"
 	"time"
 )
 
-//初始化时间
-func InitTime() {
-	defer qu.Catch()
-	StartTime, EndTime = GetWorkDayTimeUnix()
-}
-
 //获取第day天凌晨的时间戳
 func GetTime(day int) int64 {
 	defer qu.Catch()

+ 8 - 9
src/work.go → src/util/work.go

@@ -1,4 +1,4 @@
-package main
+package util
 
 import (
 	qu "qfw/util"
@@ -6,20 +6,19 @@ import (
 	"time"
 )
 
-var workfig map[string]map[string]string
+var Workfig map[string]map[string]string
 var morning_on, morning_off, afternoon_on, afternoon_off string
 
-func init() {
-	qu.ReadConfig("./worktime.json", &workfig)
-	morning_on = workfig["morning"]["on"]
-	morning_off = workfig["morning"]["off"]
-	afternoon_on = workfig["afternoon"]["on"]
-	afternoon_off = workfig["afternoon"]["off"]
+func InitWorkfig() {
+	morning_on = Workfig["morning"]["on"]
+	morning_off = Workfig["morning"]["off"]
+	afternoon_on = Workfig["afternoon"]["on"]
+	afternoon_off = Workfig["afternoon"]["off"]
 }
 
 //获取完成时间
 func CompleteTime(urgent string) int64 {
-	duration := workfig["urgency"][urgent]
+	duration := Workfig["urgency"][urgent]
 	do := int64(0)
 	if strings.Contains(duration, "h") { //单位为小时,需要计算
 		do = qu.Int64All(strings.Replace(duration, "h", "", -1)) * int64(3600)

+ 3 - 3
src/worktime.json

@@ -8,9 +8,9 @@
         "off": "17:40"
     },
     "urgency": {
-        "4": "2h",
-        "3": "6h",
-        "2": "16h",
+        "4": "8h",
+        "3": "16h",
+        "2": "24h",
         "1": "40h"
     }
 }

Some files were not shown because too many files changed in this diff