Эх сурвалжийг харах

第四版爬虫任务更新

maxiaoshan 1 жил өмнө
parent
commit
2e1dc5187f

+ 5 - 0
src/config.json

@@ -149,6 +149,11 @@
 	"newstartaskcron": "0 0 7 ? * *",
 	"closenum": 2,
 	"daynum": 6,
+	"infoformat": {
+		"1": true,
+		"2": true,
+		"3": true
+	},
 	"mail": {
 		"host": "smtp.exmail.qq.com",
 		"port": 465,

+ 2 - 0
src/logs/task.log

@@ -1765,3 +1765,5 @@
 2023/06/01 11:14:47 newtask.go:207: info  28000
 2023/06/01 11:14:47 newtask.go:207: info  29000
 2023/06/01 11:14:47 newtask.go:207: info  30000
+2023/09/28 15:04:09 newtask.go:97: info  1695744000 1695830400 2023-09-27
+2023/09/28 15:04:10 newtask.go:186: info  0

+ 165 - 56
src/luatask/newtask.go

@@ -10,12 +10,13 @@ import (
 	"util"
 )
 
-const NEWTASK_LISTERR, NEWTASK_DATAINFOERR, NEWTASK_RATEERR, NEWTASK_DOWNLOADERR, NEWTASK_DATAINFOWARN = "1", "2", "4", "5", "6"
+const NEWTASK_LISTERR, NEWTASK_DATAINFOERR, NEWTASK_PAGEFLIPERR, NEWTASK_RATEERR, NEWTASK_DOWNLOADERR, NEWTASK_DATAINFOWARN = "1", "2", "3", "4", "5", "6"
 
 var NewCodeInfoMap = map[string]*NewSpider{}
 var LuaErrTypeInfo = map[string]string{
 	NEWTASK_LISTERR:      "列表页异常",
 	NEWTASK_DATAINFOERR:  "数据异常错误",
+	NEWTASK_PAGEFLIPERR:  "爬虫翻页异常",
 	NEWTASK_RATEERR:      "采集频率异常",
 	NEWTASK_DOWNLOADERR:  "下载异常",
 	NEWTASK_DATAINFOWARN: "数据异常警告",
@@ -44,6 +45,7 @@ type NewSpider struct {
 	Channel      string                 `bson:"channel"`
 	Platform     string                 `bson:"platform"`
 	Event        int                    `bson:"event"`
+	InfoFormat   int                    `bson:"infoformat"`
 	PendState    int                    `bson:"pendstate"`
 	ModifyUser   string                 `bson:"modifyuser"`
 	ModifyId     string                 `bson:"modifyuserid"`
@@ -52,6 +54,11 @@ type NewSpider struct {
 	Working      int                    `bson:"working"`
 	AuditTime    int64                  `bson:"l_uploadtime"`
 	ListIsFilter bool                   `bson:"listisfilter"`
+	UpLimit      int                    `bson:"uplimit"`
+	MaxPage      int                    `bson:"maxpage"`
+	Page_FlipOk  bool                   `bson:"page_flipok"`
+	Page_OneOk   bool                   `bson:"page_oneok"`
+	Page_TwoOk   bool                   `bson:"page_twook"`
 	CodeTags     map[string]interface{} `bson:"codetags"`
 	//统计信息
 	Detail_DownloadNum        int               `bson:"detail_downloadnum"`
@@ -73,7 +80,7 @@ type NewSpider struct {
 	Comeintime int64 `bson:"comeintime"`
 	//异常汇总
 	//Error          map[string]*ErrorInfo `json:"error"`
-	ErrType        int          `bson:"errtype"`        //记录权重最高的异常类型
+	ErrType        string       `bson:"errtype"`        //记录权重最高的异常类型
 	ErrTypeMap     map[int]bool `bson:"errtypemap"`     //记录所有异常
 	ErrDescription string       `bson:"errdescription"` //异常描述
 }
@@ -115,7 +122,9 @@ func getCodeBaseInfo() {
 			},
 			//lua正在被维护的爬虫和上架爬虫
 			map[string]interface{}{
-				"platform": "golua平台",
+				"platform": map[string]interface{}{
+					"$in": []string{"golua平台", "chrome"},
+				},
 				"state": map[string]interface{}{
 					"$in": []int{0, 1, 2}, //待完成、待审核、未通过
 				},
@@ -138,6 +147,8 @@ func getCodeBaseInfo() {
 		"l_uploadtime": 1,
 		"listisfilter": 1,
 		"codetags":     1,
+		"infoformat":   1,
+		"param_common": 1,
 	}
 	it := sess.DB(util.MgoEB.DbName).C("luaconfig").Find(&query).Select(&fields).Iter()
 	n := 0
@@ -152,9 +163,15 @@ func getCodeBaseInfo() {
 			sp := &NewSpider{
 				WarnInfoMap: map[int]*WarnInfo{},
 				//Error:       map[string]*ErrorInfo{},
-				ErrType:    -1,
-				ErrTypeMap: map[int]bool{},
+				ErrType:     "-1",
+				ErrTypeMap:  map[int]bool{},
+				Page_FlipOk: true,
+				Page_OneOk:  true,
+				Page_TwoOk:  true,
 			}
+			param_common := tmp["param_common"].([]interface{})
+			maxPage := qu.IntAll(param_common[5])
+			delete(tmp, "param_common")
 			luaByte, _ := bson.Marshal(tmp)
 			if bson.Unmarshal(luaByte, &sp) != nil {
 				qu.Info("初始化爬虫失败:", tmp["_id"])
@@ -162,6 +179,7 @@ func getCodeBaseInfo() {
 			}
 			sp.Working = util.CodeEventWorking[sp.Working]
 			sp.Model = util.CodeEventModel[sp.Event]
+			sp.MaxPage = maxPage
 			if sp.Platform == "python" {
 				sp.Model = 1
 			}
@@ -552,10 +570,13 @@ func getSpiderDownloadRateData() {
 		//},
 	}
 	fields := map[string]interface{}{
-		"spidercode": 1,
-		"alltimes":   1,
-		"zero":       1,
-		"oh_percent": 1,
+		"spidercode":   1,
+		"alltimes":     1,
+		"zero":         1,
+		"oh_percent":   1,
+		"uplimit":      1,
+		"page_fail":    1,
+		"page_onefail": 1,
 	}
 	it := sess.DB(util.MgoS.DbName).C("spider_downloadrate").Find(&query).Select(&fields).Iter()
 	n := 0
@@ -571,11 +592,18 @@ func getSpiderDownloadRateData() {
 			alltimes := qu.IntAll(tmp["alltimes"])
 			zero := qu.IntAll(tmp["zero"])
 			oh_percent := qu.IntAll(tmp["oh_percent"])
+			uplimit := qu.IntAll(tmp["uplimit"])
+			page_fail := qu.IntAll(tmp["page_fail"])
+			page_onefail := qu.IntAll(tmp["page_onefail"])
 			lock.Lock()
 			if sp := NewCodeInfoMap[code]; sp != nil {
 				sp.List_NoDataTimes = zero
 				sp.List_RunTimes = alltimes
 				sp.List_AllInTimes = oh_percent
+				sp.Page_FlipOk = !(uplimit > 0)
+				sp.UpLimit = uplimit
+				sp.Page_OneOk = !(page_onefail == alltimes && page_onefail > 0)
+				sp.Page_TwoOk = !(page_fail == alltimes && page_fail > 0)
 			}
 			lock.Unlock()
 		}(tmp)
@@ -626,7 +654,7 @@ func saveCodeInfo() {
 				codeInfoArr = []map[string]interface{}{}
 			}
 			if len(taskArr) > 500 {
-				util.MgoEB.UpSertBulk("newtask", taskArr...)
+				util.MgoEB.UpSertBulk("task", taskArr...)
 				taskArr = [][]map[string]interface{}{}
 			}
 		}(spider)
@@ -637,7 +665,7 @@ func saveCodeInfo() {
 		codeInfoArr = []map[string]interface{}{}
 	}
 	if len(taskArr) > 0 {
-		util.MgoEB.UpSertBulk("newtask", taskArr...)
+		util.MgoEB.UpSertBulk("task", taskArr...)
 		taskArr = [][]map[string]interface{}{}
 	}
 	NewCodeInfoMap = map[string]*NewSpider{}
@@ -649,7 +677,10 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
 	if sp.Event == 7000 {
 		return
 	}
-	if sp.ErrType == -1 { //无异常
+	if sp.ErrType == "-1" { //无异常
+		return
+	}
+	if !util.CreateTaskInfoFormat[sp.InfoFormat] { //非创建任务爬虫
 		return
 	}
 	//查询历史任务
@@ -660,12 +691,13 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
 		},
 	}
 	fields := map[string]interface{}{
-		"i_state":    1,
-		"s_type":     1,
-		"s_descript": 1,
-		"i_times":    1,
+		"i_state":      1,
+		"s_type":       1,
+		"s_descript":   1,
+		"i_times":      1,
+		"l_comeintime": 1,
 	}
-	list, _ := util.MgoEB.Find("newtask", query, nil, fields, false, -1, -1)
+	list, _ := util.MgoEB.Find("task", query, nil, fields, false, -1, -1)
 	update := []map[string]interface{}{}
 	if list != nil && len(*list) > 0 { //已有任务
 		if len(*list) > 1 {
@@ -677,11 +709,12 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
 			})
 			return
 		}
-		task := (*list)[0]                                 //唯一任务
-		state_old := qu.IntAll(task["i_state"])            //历史任务状态
-		times_old := qu.IntAll(task["i_times"])            //历史任务次数
-		type_old := qu.ObjToString(task["s_type"])         //历史任务异常类型
-		descript_old := qu.ObjToString(task["s_descript"]) //历史任务描述
+		task := (*list)[0]                                  //唯一任务
+		state_old := qu.IntAll(task["i_state"])             //历史任务状态
+		times_old := qu.IntAll(task["i_times"])             //历史任务次数
+		type_old := qu.ObjToString(task["s_type"])          //历史任务异常类型
+		descript_old := qu.ObjToString(task["s_descript"])  //历史任务描述
+		comeintime_old := qu.Int64All(task["l_comeintime"]) //历史任务创建时间
 
 		result := map[string]interface{}{
 			"i_event":      sp.Event,
@@ -691,11 +724,19 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
 		}
 		//任务状态
 		if state_old == 0 {
-			result["i_state"] = 1 //第二次任务,将历史待确认任务升级为待处理
+			if sp.ErrType == NEWTASK_LISTERR || sp.ErrType == NEWTASK_DATAINFOERR {
+				result["i_state"] = 1
+			} else if comeintime_old >= util.GetTime(-30) { //在一个月内有历史任务
+				result["i_state"] = 1
+			} else {
+				result["l_complete"] = util.CompleteTime("1")
+				result["l_comeintime"] = time.Now().Unix()
+				result["l_updatetime"] = time.Now().Unix()
+			}
 		}
 		//任务类型
-		if sp.ErrType < qu.IntAll(type_old) { //取优先级高者
-			result["s_type"] = fmt.Sprint(sp.ErrType)
+		if sp.ErrType < type_old { //取优先级高者
+			result["s_type"] = sp.ErrType
 		}
 		update = append(update, map[string]interface{}{"_id": task["_id"]})
 		update = append(update, map[string]interface{}{"$set": result})
@@ -704,10 +745,10 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
 		lock.Unlock()
 	} else { //无历史任务
 		state_new := 0
-		if sp.ErrType == 1 && sp.Channel_Status != 200 { //列表页异常任务,栏目响应状态异常者,直接建待处理任务
-			state_new = 1
-		}
-		if sp.ErrType == 2 { //数据异常错误类型,任务状态1
+		//if sp.ErrType == 1 && sp.Channel_Status != 200 { //列表页异常任务,栏目响应状态异常者,直接建待处理任务
+		//	state_new = 1
+		//}
+		if sp.ErrType == NEWTASK_LISTERR || sp.ErrType == NEWTASK_DATAINFOERR {
 			state_new = 1
 		}
 		saveMap := map[string]interface{}{
@@ -719,7 +760,7 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
 			"i_event":      sp.Event,
 			"i_state":      state_new,
 			"s_source":     "程序",
-			"s_type":       fmt.Sprint(sp.ErrType),
+			"s_type":       sp.ErrType,
 			"s_descript":   sp.ErrDescription,
 			"i_times":      1,
 			"l_comeintime": time.Now().Unix(),
@@ -738,6 +779,7 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
 func getAllErr(sp *NewSpider) {
 	listErr(sp)           //列表页异常
 	dataInfoErr(sp)       //数据异常错误
+	pageFlipErr(sp)       //爬虫翻页异常
 	downloadRateErr(sp)   //下载频率异常
 	downloadFailedErr(sp) //下载异常
 	dataInfoWarn(sp)      //数据异常警告
@@ -764,7 +806,7 @@ func listErr(sp *NewSpider) {
 			//sp.Error[NEWTASK_LISTERR] = &ErrorInfo{
 			//	ErrInfo: map[string]bool{LuaErrTypeInfo[NEWTASK_LISTERR]: true},
 			//}
-			sp.ErrType = qu.IntAll(NEWTASK_LISTERR)
+			sp.ErrType = NEWTASK_LISTERR
 			sp.ErrTypeMap[qu.IntAll(NEWTASK_LISTERR)] = true
 			heartTime := ""
 			if sp.HeartTime != 0 {
@@ -806,23 +848,65 @@ func dataInfoErr(sp *NewSpider) {
 			//}
 			sp.ErrDescription += "数据异常错误:\n" + resultDescription
 			sp.ErrTypeMap[qu.IntAll(NEWTASK_DATAINFOERR)] = true
-			if sp.ErrType < 0 {
-				sp.ErrType = qu.IntAll(NEWTASK_DATAINFOERR)
+			if sp.ErrType < "0" {
+				sp.ErrType = NEWTASK_DATAINFOERR
 			}
 		}
 	}
 
 }
-func downloadRateErr(sp *NewSpider) {
+
+func pageFlipErr(sp *NewSpider) {
 	defer qu.Catch()
-	if sp.Platform == "python" && !sp.Py_IsValid {
+	if sp.Platform == "python" {
 		return
 	}
-	if sp.List_AllInTimes > 0 {
-		errFlag := false
-		if sp.Model == 1 && sp.AuditTime > 24 { //分开采集,且爬虫审核时间超过24小时,记录异常
+	errFlag := false
+	if sp.CodeTags != nil {
+		tagTime, _ := sp.CodeTags[NEWTASK_PAGEFLIPERR].(int64)
+		if tagTime == 0 { //无翻页异常标记
+			errFlag = true
+		} else if tagTime > 0 && tagTime <= util.GetTime(-7) { //标记失效
 			errFlag = true
-		} else if sp.Event != 7410 { //顺序采集(7410节点不建采集频率异常任务)
+		}
+	} else { //无标记,记录翻页异常
+		errFlag = true
+	}
+	if errFlag {
+		//1、无限翻页爬虫列表页采集时超过最大限制页,高性能100页,队列50页
+		if !sp.Page_FlipOk && sp.Model == 1 {
+			sp.ErrTypeMap[qu.IntAll(NEWTASK_PAGEFLIPERR)] = true
+			sp.ErrDescription += "爬虫翻页异常:\n	列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.UpLimit) + "轮列表页采集翻页超过最大限制\n"
+			if sp.ErrType < "0" {
+				sp.ErrType = NEWTASK_PAGEFLIPERR
+			}
+		}
+		//2、爬虫列表页采集第一页无数据,第二页有数据
+		if !sp.Page_OneOk {
+			sp.ErrTypeMap[qu.IntAll(NEWTASK_PAGEFLIPERR)] = true
+			sp.ErrDescription += "爬虫翻页异常:\n	列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_RunTimes) + "轮爬虫未采集到第一页数据\n"
+			if sp.ErrType < "0" {
+				sp.ErrType = NEWTASK_PAGEFLIPERR
+			}
+		}
+		//3、爬虫列表页采集第一页有数据,第二页无数据或第二页数据与第一页数据相同
+		if !sp.Page_TwoOk {
+			sp.ErrTypeMap[qu.IntAll(NEWTASK_PAGEFLIPERR)] = true
+			sp.ErrDescription += "爬虫翻页异常:\n	列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_RunTimes) + "轮爬虫采集的第一、二页数据相同或未采集到第二页数据\n"
+			if sp.ErrType < "0" {
+				sp.ErrType = NEWTASK_PAGEFLIPERR
+			}
+		}
+	}
+}
+
+func downloadRateErr(sp *NewSpider) {
+	defer qu.Catch()
+	if sp.Platform == "python" {
+		if !sp.Py_IsValid { //无效爬虫
+			return
+		} else {
+			errFlag := false
 			if sp.CodeTags != nil {
 				tagTime, _ := sp.CodeTags[NEWTASK_RATEERR].(int64)
 				if tagTime == 0 { //无频率异常标记
@@ -830,18 +914,43 @@ func downloadRateErr(sp *NewSpider) {
 				} else if tagTime > 0 && tagTime <= util.GetTime(-7) { //标记失效
 					errFlag = true
 				}
-			} else { //无标记,记录列表页异常
+			} else { //无标记,记录采集频率异常
 				errFlag = true
 			}
+			if errFlag && sp.List_AllInTimes > 0 && sp.AuditTime > 24 {
+				sp.ErrTypeMap[qu.IntAll(NEWTASK_RATEERR)] = true
+				sp.ErrDescription += "采集频率异常:\n	列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_AllInTimes) + "轮数据全采\n"
+				if sp.ErrType < "0" {
+					sp.ErrType = NEWTASK_RATEERR
+				}
+			}
 		}
-		if errFlag {
-			//sp.Error[NEWTASK_RATEERR] = &ErrorInfo{
-			//	ErrInfo: map[string]bool{LuaErrTypeInfo[NEWTASK_RATEERR]: true},
-			//}
-			sp.ErrTypeMap[qu.IntAll(NEWTASK_RATEERR)] = true
-			sp.ErrDescription += "采集频率异常:\n	列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_AllInTimes) + "轮数据全采\n"
-			if sp.ErrType < 0 {
-				sp.ErrType = qu.IntAll(NEWTASK_RATEERR)
+	} else { //lua
+		if sp.List_AllInTimes > 0 {
+			errFlag := false
+			if sp.Model == 1 && sp.AuditTime > 24 && (sp.MaxPage == 1 || sp.MaxPage > 100) { //分开采集,且爬虫审核时间超过24小时,记录异常
+				errFlag = true
+			} else if sp.Event != 7410 { //顺序采集(7410节点不建采集频率异常任务)
+				if sp.CodeTags != nil {
+					tagTime, _ := sp.CodeTags[NEWTASK_RATEERR].(int64)
+					if tagTime == 0 { //无频率异常标记
+						errFlag = true
+					} else if tagTime > 0 && tagTime <= util.GetTime(-7) { //标记失效
+						errFlag = true
+					}
+				} else { //无标记,记录采集频率异常
+					errFlag = true
+				}
+			}
+			if errFlag {
+				//sp.Error[NEWTASK_RATEERR] = &ErrorInfo{
+				//	ErrInfo: map[string]bool{LuaErrTypeInfo[NEWTASK_RATEERR]: true},
+				//}
+				sp.ErrTypeMap[qu.IntAll(NEWTASK_RATEERR)] = true
+				sp.ErrDescription += "采集频率异常:\n	列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_AllInTimes) + "轮数据全采\n"
+				if sp.ErrType < "0" {
+					sp.ErrType = NEWTASK_RATEERR
+				}
 			}
 		}
 	}
@@ -925,8 +1034,8 @@ func downloadFailedErr(sp *NewSpider) {
 				}
 				sp.getErrHrefs("spider_highlistdata", NEWTASK_DOWNLOADERR, q)
 				sp.ErrTypeMap[qu.IntAll(NEWTASK_DOWNLOADERR)] = true
-				if sp.ErrType < 0 {
-					sp.ErrType = qu.IntAll(NEWTASK_DOWNLOADERR)
+				if sp.ErrType < "0" {
+					sp.ErrType = NEWTASK_DOWNLOADERR
 				}
 			}
 		} else { //顺序采集
@@ -943,8 +1052,8 @@ func downloadFailedErr(sp *NewSpider) {
 			count := sp.getErrHrefs("spider_listdata", NEWTASK_DOWNLOADERR, q)
 			if count > 0 {
 				sp.ErrTypeMap[qu.IntAll(NEWTASK_DOWNLOADERR)] = true
-				if sp.ErrType < 0 {
-					sp.ErrType = qu.IntAll(NEWTASK_DOWNLOADERR)
+				if sp.ErrType < "0" {
+					sp.ErrType = NEWTASK_DOWNLOADERR
 				}
 			}
 		}
@@ -980,8 +1089,8 @@ func dataInfoWarn(sp *NewSpider) {
 				//}
 				sp.ErrDescription += "数据异常警告:\n" + resultDescription
 				sp.ErrTypeMap[qu.IntAll(NEWTASK_DATAINFOWARN)] = true
-				if sp.ErrType < 0 {
-					sp.ErrType = qu.IntAll(NEWTASK_DATAINFOWARN)
+				if sp.ErrType < "0" {
+					sp.ErrType = NEWTASK_DATAINFOWARN
 				}
 			}
 		}
@@ -999,7 +1108,7 @@ func (sp *NewSpider) getErrHrefs(coll, errType string, query map[string]interfac
 		return
 	}
 	sp.ErrDescription += LuaErrTypeInfo[NEWTASK_DOWNLOADERR] + ":共下载" + fmt.Sprint(sp.Detail_DownloadNum) + "条,失败" + fmt.Sprint(sp.Detail_DownloadFailNum) + "条\n"
-	if sp.Platform != "golua平台" {
+	if sp.Platform != "golua平台" || sp.Platform != "chrome" {
 		return
 	}
 	list, _ := util.MgoS.Find(coll, query, nil, `{"href":1}`, false, 0, 3)
@@ -1044,7 +1153,7 @@ func closeTask() {
 			"l_closetime": time.Now().Unix(),
 		},
 	}
-	util.MgoEB.Update("newtask", query, set, false, true)
+	util.MgoEB.Update("task", query, set, false, true)
 }
 
 /*

+ 6 - 3
src/luatask/othertask.go

@@ -11,8 +11,10 @@ func CreateTaskByCodePendstate() {
 	defer qu.Catch()
 	today := time.Now()
 	query := map[string]interface{}{
-		"state":     5,
-		"platform":  "golua平台",
+		"state": 5,
+		"platform": map[string]interface{}{
+			"$in": []string{"golua平台", "chrome"},
+		},
 		"pendstate": 1,
 		"pendtime": map[string]interface{}{
 			"$lt": util.GetTime(-30), //爬虫挂起超过30天的,创建任务
@@ -26,6 +28,7 @@ func CreateTaskByCodePendstate() {
 		"modifyuserid": 1,
 		"event":        1,
 		"pendtime":     1,
+		"platform":     1,
 	}
 	list, _ := util.MgoEB.Find("luaconfig", query, nil, fields, false, -1, -1)
 	for _, tmp := range *list {
@@ -54,7 +57,7 @@ func CreateTaskByCodePendstate() {
 			set["s_descript"] = descript + qu.FormatDate(&today, qu.Date_Short_Layout) + "追加描述:------------------------------\n该爬虫已连续挂起超30天(" + pendtimeStr + ")\n"
 		} else { //无历史任务
 			set["s_descript"] = "该爬虫已连续挂起超30天(" + pendtimeStr + ")\n"
-			set["s_platform"] = "golua平台"
+			set["s_platform"] = tmp["platform"]
 			set["s_channel"] = tmp["channel"]
 			set["i_event"] = tmp["event"]
 			set["s_type"] = "7"

+ 0 - 63
src/luatask/sitecount.go

@@ -40,32 +40,6 @@ var SiteInfoModel = `{
     }
 }`
 
-var LuaListDownloadAllNum int64
-var LuaListDownloadSuccessAllNum int64
-var LuaBiddingDownloadAllNum int64
-var PythonListDownloadAllNum int64
-var PythonListDownloadSuccessAllNum int64
-var PythonBiddingDownloadAllNum int64
-
-var LuaPythonNumModel = `{
-    "msgtype": "text",
-    "text": {
-		"content": "%s"
-	}
-}`
-var MarkdownModel = `{
-    "msgtype": "markdown",
-    "markdown": {
-        "content": "%s"
-    }
-}`
-var NumContentModel = `
-     >平台:<font color=\"warning\">%s</font>
-     >列表页采集量:<font color=\"warning\">%d</font>
-     >列表页采集成功量:<font color=\"warning\">%d</font>\n
-     >Bidding成功量:<font color=\"warning\">%d</font>\n
-`
-
 //var AllHref map[string]string
 
 //重点网站每日采集量统计
@@ -510,43 +484,6 @@ func SendSiteInfoToWxWork(file *xlsx.File) {
 	defer resp1.Body.Close()
 }
 
-func SendLuaPythonAllNum() {
-	defer qu.Catch()
-	luaContent := fmt.Sprintf(NumContentModel, "Lua", LuaListDownloadAllNum, LuaListDownloadSuccessAllNum, LuaBiddingDownloadAllNum)
-	pythonContent := fmt.Sprintf(NumContentModel, "python", PythonListDownloadAllNum, PythonListDownloadSuccessAllNum, PythonBiddingDownloadAllNum)
-	resultContent := fmt.Sprintf(MarkdownModel, Publishtime+",Lua、Python各维度采集量统计结果如下:\n"+luaContent+pythonContent)
-	qu.Debug(resultContent)
-	//保存记录
-	util.MgoS.Save("spider_luapythoncount", map[string]interface{}{
-		"lualistnum":           LuaListDownloadAllNum,
-		"lualistsuccessnum":    LuaListDownloadSuccessAllNum,
-		"luabiddingnum":        LuaBiddingDownloadAllNum,
-		"pythonlistnum":        PythonListDownloadAllNum,
-		"pythonlistsuccessnum": PythonListDownloadSuccessAllNum,
-		"pythonbiddingnum":     PythonBiddingDownloadAllNum,
-		"comeintime":           time.Now().Unix(),
-		"date":                 Publishtime,
-	})
-	//重置
-	LuaListDownloadAllNum = 0
-	LuaListDownloadSuccessAllNum = 0
-	LuaBiddingDownloadAllNum = 0
-	PythonListDownloadAllNum = 0
-	PythonListDownloadSuccessAllNum = 0
-	PythonBiddingDownloadAllNum = 0
-	//发送统计
-	resp, err := http.Post(
-		"https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=97850772-88d0-4544-a2c3-6201aeddff9e",
-		"application/json",
-		bytes.NewBuffer([]byte(resultContent)),
-	)
-	if err != nil {
-		fmt.Println("request error:", err)
-		return
-	}
-	defer resp.Body.Close()
-}
-
 //func GetHighListDataNum(ctime, etime int64, ptime string, siteInfoMap map[string]*SiteInfo) {
 //	defer qu.Catch()
 //	sess := util.MgoS.GetMgoConn()

+ 26 - 81
src/luatask/task.go

@@ -7,8 +7,8 @@ import (
 	"regexp"
 	"strings"
 	"sync"
-	"sync/atomic"
 	"time"
+	"timetask"
 	"util"
 
 	"github.com/donnie4w/go-logger/logger"
@@ -184,7 +184,6 @@ func StartTask() {
 	InitInfo() //初始化时间
 	logger.Debug(StartTime, EndTime, Publishtime)
 	GetCodeBaseInfo() //初始化爬虫基本信息
-	GetBiddingCount() //统计bidding表爬虫采集量
 	//GetBiddingFileData()           //统计bidding_file附件大小为3.7 KB的信息
 	GetCodeHeart()                 //初始化爬虫心跳信息
 	GetSpiderHighListDownloadNum() //统计spider_highlistdata爬虫列表页下载量、下载失败量、未下载量
@@ -198,8 +197,8 @@ func StartTask() {
 	// GetDownloadNumber() //统计下载量
 	//CloseTask()      //关闭任务
 	SendInfoToWxWork_SiteDataCount()
-	SendLuaPythonAllNum()
-	CreateTaskByCodePendstate() //挂机爬虫任务
+	CreateTaskByCodePendstate()          //挂机爬虫任务
+	timetask.CountLuaPythonNumEveryDay() //每日采集量统计
 }
 
 //初始化
@@ -237,7 +236,9 @@ func GetCodeBaseInfo() {
 			},
 			//lua正在被维护的爬虫和上架爬虫
 			map[string]interface{}{
-				"platform": "golua平台",
+				"platform": map[string]interface{}{
+					"$in": []string{"golua平台", "chrome"},
+				},
 				"state": map[string]interface{}{
 					"$in": []int{0, 1, 2}, //待完成、待审核、未通过
 				},
@@ -324,54 +325,6 @@ func GetCodeBaseInfo() {
 	logger.Debug("爬虫基本信息准备完成...", len(CodeInfoMap))
 }
 
-func GetBiddingCount() {
-	defer qu.Catch()
-	sess := util.MgoB.GetMgoConn()
-	defer util.MgoB.DestoryMongoConn(sess)
-	lock := &sync.Mutex{}
-	wg := &sync.WaitGroup{}
-	ch := make(chan bool, 5)
-	query := map[string]interface{}{
-		"comeintime": map[string]interface{}{
-			"$gte": StartTime,
-			"$lt":  EndTime,
-		},
-	}
-	fieles := map[string]interface{}{
-		"spidercode": 1,
-	}
-	count := util.MgoB.Count("bidding", query)
-	logger.Debug("bidding采集数据量:", count)
-	it := sess.DB(util.MgoB.DbName).C("bidding").Find(&query).Select(&fieles).Iter()
-	n := 0
-	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
-		wg.Add(1)
-		ch <- true
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-ch
-				wg.Done()
-			}()
-			code := qu.ObjToString(tmp["spidercode"])
-			lock.Lock()
-			if sp := CodeInfoMap[code]; sp != nil {
-				if sp.Platform == "golua平台" {
-					LuaBiddingDownloadAllNum++
-				} else if sp.Platform == "python" {
-					PythonBiddingDownloadAllNum++
-				}
-			}
-			lock.Unlock()
-		}(tmp)
-		if n%1000 == 0 {
-			logger.Debug(n)
-		}
-		tmp = map[string]interface{}{}
-	}
-	wg.Wait()
-	logger.Debug("Bidding数据量统计完成...", LuaBiddingDownloadAllNum, PythonBiddingDownloadAllNum)
-}
-
 // GetCodeHeart 获取爬虫的心跳信息
 func GetCodeHeart() {
 	defer qu.Catch()
@@ -2165,16 +2118,6 @@ func CreateTaskProcess() {
 			if spider.Platform == "golua平台" {
 				//根据爬虫信息新建任务(python任务无人处理,暂停)
 				CreateTask(task, spider, &upsertBulk, lock) //比对历史任务,新建任务
-
-				//列表页总下载量
-				atomic.AddInt64(&LuaListDownloadAllNum, int64(spider.RepeatDownloadAllNum))
-				//列表页总下载成功量
-				atomic.AddInt64(&LuaListDownloadSuccessAllNum, int64(spider.RepeatDownloadSuccessNum))
-			} else {
-				//列表页总下载量
-				atomic.AddInt64(&PythonListDownloadAllNum, int64(spider.RepeatDownloadAllNum))
-				//列表页总下载成功量
-				atomic.AddInt64(&PythonListDownloadSuccessAllNum, int64(spider.RepeatDownloadSuccessNum))
 			}
 
 			lock.Lock()
@@ -2183,7 +2126,7 @@ func CreateTaskProcess() {
 				arr = []map[string]interface{}{}
 			}
 			if len(upsertBulk) > 500 {
-				util.MgoEB.UpSertBulk("task", upsertBulk...)
+				util.MgoEB.UpSertBulk("task_back", upsertBulk...)
 				upsertBulk = [][]map[string]interface{}{}
 			}
 			lock.Unlock()
@@ -2196,7 +2139,7 @@ func CreateTaskProcess() {
 		arr = []map[string]interface{}{}
 	}
 	if len(upsertBulk) > 0 {
-		util.MgoEB.UpSertBulk("task", upsertBulk...)
+		util.MgoEB.UpSertBulk("task_back", upsertBulk...)
 		upsertBulk = [][]map[string]interface{}{}
 	}
 	lock.Unlock()
@@ -2252,7 +2195,7 @@ func CreateTask(t *Task, sp *Spider, upsertBulk *[][]map[string]interface{}, loc
 		"i_times":    1,
 		"s_urgency":  1,
 	}
-	list, _ := util.MgoEB.Find("task", query, nil, fields, false, -1, -1)
+	list, _ := util.MgoEB.Find("task_back", query, nil, fields, false, -1, -1)
 	update := []map[string]interface{}{}
 	logger.Info("创建任务爬虫:", sp.Code)
 	if list != nil && len(*list) > 0 { //已有任务
@@ -2448,8 +2391,10 @@ func ResetHistoryDataState() {
 			"$ne": 0,
 		},
 	}
-	pTimeStartInt := time.Now().Unix() - int64(util.DayNum*86400)
-	pTimeStartStr := qu.FormatDateByInt64(&pTimeStartInt, qu.Date_Short_Layout)
+	pTimeStartInt1 := time.Now().Unix() - int64(util.DayNum*86400)
+	pTimeStartInt2 := time.Now().AddDate(0, 0, 2).Unix()
+	pTimeStartStr1 := qu.FormatDateByInt64(&pTimeStartInt1, qu.Date_Short_Layout)
+	pTimeStartStr2 := qu.FormatDateByInt64(&pTimeStartInt2, qu.Date_Short_Layout)
 	it := sess.DB("spider").C("spider_historydata").Find(&query).Iter()
 	count, _ := sess.DB("spider").C("spider_historydata").Find(&query).Count()
 	logger.Info("更新数据状态数量:", count)
@@ -2474,7 +2419,7 @@ func ResetHistoryDataState() {
 				save = append(save, tmp)
 				update = append(update, map[string]interface{}{"$set": map[string]interface{}{"delete": true}})
 			} else if state == -1 {
-				if pTimeStartStr <= publishtime { //最近几天未下成功的数据状态重置
+				if pTimeStartStr1 <= publishtime && publishtime <= pTimeStartStr2 { //最近几天未下成功的数据状态重置
 					update = append(update, map[string]interface{}{"$set": map[string]interface{}{"times": 0, "state": 0}})
 				} else { //非最近几天下载失败的数据不再下载,进行迁移
 					save = append(save, tmp)
@@ -2549,7 +2494,7 @@ func CloseTask() {
 			"i_state": 6,
 		},
 	}
-	util.MgoEB.Update("task", query, set, false, true)
+	util.MgoEB.Update("task_back", query, set, false, true)
 	logger.Debug("---清理未更新任务完毕---")
 }
 
@@ -2583,17 +2528,17 @@ func SaveCodeInfo() {
 				logger.Debug("Json UnMarshal Error", code)
 				return
 			}
-			if sp.Platform == "golua平台" {
-				//列表页总下载量
-				atomic.AddInt64(&LuaListDownloadAllNum, int64(sp.RepeatDownloadAllNum))
-				//列表页总下载成功量
-				atomic.AddInt64(&LuaListDownloadSuccessAllNum, int64(sp.RepeatDownloadSuccessNum))
-			} else {
-				//列表页总下载量
-				atomic.AddInt64(&PythonListDownloadAllNum, int64(sp.RepeatDownloadAllNum))
-				//列表页总下载成功量
-				atomic.AddInt64(&PythonListDownloadSuccessAllNum, int64(sp.RepeatDownloadSuccessNum))
-			}
+			//if sp.Platform == "golua平台" {
+			//	//列表页总下载量
+			//	atomic.AddInt64(&LuaListDownloadAllNum, int64(sp.RepeatDownloadAllNum))
+			//	//列表页总下载成功量
+			//	atomic.AddInt64(&LuaListDownloadSuccessAllNum, int64(sp.RepeatDownloadSuccessNum))
+			//} else {
+			//	//列表页总下载量
+			//	atomic.AddInt64(&PythonListDownloadAllNum, int64(sp.RepeatDownloadAllNum))
+			//	//列表页总下载成功量
+			//	atomic.AddInt64(&PythonListDownloadSuccessAllNum, int64(sp.RepeatDownloadSuccessNum))
+			//}
 			lock.Lock()
 			if len(arr) > 500 {
 				util.MgoEB.SaveBulk("luacodeinfo_back", arr...)

+ 10 - 4
src/timetask/luamove.go

@@ -1,8 +1,8 @@
 package timetask
 
 import (
-	"encoding/json"
 	"github.com/donnie4w/go-logger/logger"
+	"go.mongodb.org/mongo-driver/bson"
 	qu "qfw/util"
 	"sort"
 	"sync"
@@ -21,6 +21,7 @@ type Spider struct {
 	ToEvent      int    `json:"toevent"`      //目标节点
 	DataNum      int    `json:"datanum"`      //采集量
 	PtimeDataNum int    `json:"ptimedatanum"` //按发布时间统计的采集量
+	Platform     string `json:"platform"`
 	//Average      int    `json:"average"`      //平均值
 	IsMove     bool  `json:"ismove"` //是否转移节点
 	State      int   `json:"state"`
@@ -66,7 +67,9 @@ func GetLuaInfo() {
 	wg := &sync.WaitGroup{}
 	ch := make(chan bool, 5)
 	query := map[string]interface{}{
-		"platform": "golua平台",
+		"platform": map[string]interface{}{
+			"$in": []string{"golua平台", "chrome"},
+		},
 		"state": map[string]interface{}{
 			"$in": []int{0, 1, 2, 5}, //待完成、待审核、未通过、已上架
 		},
@@ -80,6 +83,7 @@ func GetLuaInfo() {
 		"site":         1,
 		"channel":      1,
 		"param_common": 1,
+		"platform":     1,
 	}
 	count := util.MgoEB.Count("luaconfig", query)
 	logger.Debug("共加载线上爬虫个数:", count)
@@ -94,6 +98,7 @@ func GetLuaInfo() {
 				wg.Done()
 			}()
 			code := qu.ObjToString(tmp["code"])
+			platform := qu.ObjToString(tmp["platform"])
 			site := qu.ObjToString(tmp["site"])
 			channel := qu.ObjToString(tmp["channel"])
 			event := qu.IntAll(tmp["event"])
@@ -127,6 +132,7 @@ func GetLuaInfo() {
 				MaxPage:    maxPage,
 				CycleTime:  cycletime,
 				FromEvent:  event,
+				Platform:   platform,
 				Comeintime: time.Now().Unix(),
 			}
 			lock.Unlock()
@@ -251,13 +257,13 @@ func GetMoveLua() {
 			sp.ToEvent = EventArrType4[0].Event
 		}
 		//存储爬虫统计信息
-		byteText, err := json.Marshal(sp)
+		byteText, err := bson.Marshal(sp)
 		if err != nil {
 			logger.Debug("Json Marshal Error", sp.Code)
 			continue
 		}
 		tmp := map[string]interface{}{}
-		if json.Unmarshal(byteText, &tmp) == nil {
+		if bson.Unmarshal(byteText, &tmp) == nil {
 			save = append(save, tmp)
 			if len(save) >= 1000 {
 				util.MgoEB.SaveBulk("luamovevent", save...)

+ 14 - 6
src/timetask/report.go

@@ -97,7 +97,9 @@ func SpiderWeeklyReport() {
 			"$gte": sTime,
 			"$lt":  eTime,
 		},
-		"platform": "golua平台",
+		"platform": map[string]interface{}{
+			"$in": []string{"golua平台", "chrome"},
+		},
 	}
 	ThisWeekAddLuaNum := util.MgoEB.Count("luaconfig", query)
 	qu.Debug("lua本周新建爬虫数量:", ThisWeekAddLuaNum)
@@ -128,8 +130,10 @@ func SpiderWeeklyReport() {
 	qu.Debug("lua完成新建爬虫同比增减:", FinishLuaIncDecRatio)
 	//待完成新建爬虫总数
 	query = map[string]interface{}{
-		"event":    7000,
-		"platform": "golua平台",
+		"event": 7000,
+		"platform": map[string]interface{}{
+			"$in": []string{"golua平台", "chrome"},
+		},
 		"state": map[string]interface{}{
 			"$lte": 2, //待完成、待审核、未通过
 		},
@@ -238,7 +242,9 @@ func SpiderWeeklyReport() {
 				},
 			},
 			map[string]interface{}{
-				"platform": "golua平台",
+				"platform": map[string]interface{}{
+					"$in": []string{"golua平台", "chrome"},
+				},
 				"state": map[string]interface{}{
 					"$in": []int{8, 9}, //需登录、转python状态的爬虫,记录在python待完成爬虫数量中
 				},
@@ -616,8 +622,10 @@ func SpiderWeeklyReportForLua() {
 	newCount := util.MgoEB.Count("lua_logs_auditor_new", q)
 	//待完成爬虫数量=待开发新爬虫+待维护历史任务
 	q1 := map[string]interface{}{
-		"event":    7000,
-		"platform": "golua平台",
+		"event": 7000,
+		"platform": map[string]interface{}{
+			"$in": []string{"golua平台", "chrome"},
+		},
 		"state": map[string]interface{}{
 			"$lte": 2, //待完成、待审核、未通过
 		},

+ 251 - 0
src/timetask/summary.go

@@ -1,12 +1,263 @@
 package timetask
 
 import (
+	"bytes"
+	"fmt"
+	"github.com/donnie4w/go-logger/logger"
+	"net/http"
 	qu "qfw/util"
 	"sync"
+	"sync/atomic"
 	"time"
 	"util"
 )
 
+var (
+	CodePlatformMap                 map[string]string
+	LuaListDownloadAllNum           int64
+	LuaListDownloadSuccessAllNum    int64
+	LuaBiddingDownloadAllNum        int64
+	PythonListDownloadAllNum        int64
+	PythonListDownloadSuccessAllNum int64
+	PythonBiddingDownloadAllNum     int64
+	Publishtime                     string
+)
+
+var LuaPythonNumModel = `{
+    "msgtype": "text",
+    "text": {
+		"content": "%s"
+	}
+}`
+var MarkdownModel = `{
+    "msgtype": "markdown",
+    "markdown": {
+        "content": "%s"
+    }
+}`
+var NumContentModel = `
+     >平台:<font color=\"warning\">%s</font>
+     >列表页采集量:<font color=\"warning\">%d</font>
+     >列表页采集成功量:<font color=\"warning\">%d</font>\n
+     >Bidding成功量:<font color=\"warning\">%d</font>\n
+`
+
+//每日采集量统计
+func CountLuaPythonNumEveryDay() {
+	//lua python每日采集量统计
+	CodePlatformMap = map[string]string{}
+	startTime := util.GetTime(-1)
+	Publishtime = qu.FormatDateByInt64(&startTime, qu.Date_Short_Layout)
+	//重置
+	LuaListDownloadAllNum = 0
+	LuaListDownloadSuccessAllNum = 0
+	LuaBiddingDownloadAllNum = 0
+	PythonListDownloadAllNum = 0
+	PythonListDownloadSuccessAllNum = 0
+	PythonBiddingDownloadAllNum = 0
+	GetCodePlatform() //爬虫所有平台
+	GetBiddingCount() //统计bidding表爬虫采集量
+	GetPythonListDownloadNum()
+	GetLuaListDownloadNum()
+	SendLuaPythonAllNum()
+}
+func GetCodePlatform() {
+	defer qu.Catch()
+	sess := util.MgoEB.GetMgoConn()
+	defer util.MgoEB.DestoryMongoConn(sess)
+	lock := &sync.Mutex{}
+	wg := &sync.WaitGroup{}
+	ch := make(chan bool, 5)
+	query := map[string]interface{}{}
+	fields := map[string]interface{}{
+		"platform": 1,
+		"code":     1,
+	}
+	it := sess.DB(util.MgoEB.DbName).C("luaconfig").Find(&query).Select(&fields).Iter()
+	n := 0
+	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			platform := qu.ObjToString(tmp["platform"])
+			code := qu.ObjToString(tmp["code"])
+			lock.Lock()
+			CodePlatformMap[code] = platform
+			lock.Unlock()
+		}(tmp)
+		if n%1000 == 0 {
+			logger.Debug(n)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("爬虫所属平台信息准备完成...", len(CodePlatformMap))
+}
+
+func GetBiddingCount() {
+	defer qu.Catch()
+	sess := util.MgoB.GetMgoConn()
+	defer util.MgoB.DestoryMongoConn(sess)
+	//lock := &sync.Mutex{}
+	wg := &sync.WaitGroup{}
+	ch := make(chan bool, 5)
+	query := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": util.GetTime(-1),
+			"$lt":  util.GetTime(0),
+		},
+	}
+	fieles := map[string]interface{}{
+		"spidercode": 1,
+	}
+	count := util.MgoB.Count("bidding", query)
+	logger.Debug("bidding采集数据量:", count)
+	it := sess.DB(util.MgoB.DbName).C("bidding").Find(&query).Select(&fieles).Iter()
+	n := 0
+	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			code := qu.ObjToString(tmp["spidercode"])
+			platform := CodePlatformMap[code]
+			if platform == "golua平台" || platform == "chrome" {
+				atomic.AddInt64(&LuaBiddingDownloadAllNum, 1)
+			} else if platform == "python" {
+				atomic.AddInt64(&PythonBiddingDownloadAllNum, 1)
+			} else {
+				atomic.AddInt64(&PythonBiddingDownloadAllNum, 1)
+				qu.Debug(code)
+			}
+		}(tmp)
+		if n%10000 == 0 {
+			logger.Debug(n)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("Bidding数据量统计完成...", LuaBiddingDownloadAllNum, PythonBiddingDownloadAllNum)
+}
+
+//python统计列表页采集量
+func GetPythonListDownloadNum() {
+	defer qu.Catch()
+	logger.Debug("python列表页数据下载量统计开始...")
+	sess := util.MgoPy.GetMgoConn()
+	defer util.MgoPy.DestoryMongoConn(sess)
+	query := map[string]interface{}{
+		"runtime": Publishtime,
+		"rel_count": map[string]interface{}{
+			"$gt": 0,
+		},
+	}
+	fields := map[string]interface{}{
+		"rel_count": 1,
+	}
+	wg := &sync.WaitGroup{}
+	ch := make(chan bool, 5)
+	it := sess.DB(util.MgoPy.DbName).C("list").Find(&query).Select(&fields).Iter()
+	n := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			count := qu.IntAll(tmp["rel_count"])
+			atomic.AddInt64(&PythonListDownloadAllNum, int64(count))
+		}(tmp)
+		if n%1000 == 0 {
+			logger.Debug(n)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	queryAll := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": util.GetTime(-1),
+			"$lt":  util.GetTime(0),
+		},
+	}
+	count := util.MgoPy.Count("data_bak", queryAll)
+	PythonListDownloadSuccessAllNum = int64(count)
+	qu.Debug("python列表页采集量:", PythonListDownloadAllNum, "采集成功量:", PythonListDownloadSuccessAllNum)
+}
+
+//lua统计列表页采集量
+func GetLuaListDownloadNum() {
+	queryAll := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": util.GetTime(-1),
+			"$lt":  util.GetTime(0),
+		},
+	}
+	querySuccess := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": util.GetTime(-1),
+			"$lt":  util.GetTime(0),
+		},
+		"state": 1,
+	}
+	//spider_highlistdata
+	allNum1 := util.MgoS.Count("spider_highlistdata", queryAll)
+	successNum1 := util.MgoS.Count("spider_highlistdata", querySuccess)
+	qu.Debug("spider_highlistdata", allNum1, successNum1)
+	//spider_listdata
+	allNum2 := util.MgoS.Count("spider_listdata", queryAll)
+	successNum2 := util.MgoS.Count("spider_listdata", querySuccess)
+	qu.Debug("spider_listdata", allNum2, successNum2)
+	//spider_historydata
+	allNum3 := util.MgoS.Count("spider_historydata", queryAll)
+	successNum3 := util.MgoS.Count("spider_historydata", querySuccess)
+	qu.Debug("spider_historydata", allNum3, successNum3)
+	//spider_historydata_back
+	allNum4 := util.MgoS.Count("spider_historydata_back", queryAll)
+	successNum4 := util.MgoS.Count("spider_historydata_back", querySuccess)
+	qu.Debug("spider_historydata_back", allNum4, successNum4)
+	LuaListDownloadAllNum = int64(allNum1) + int64(allNum2) + int64(allNum3) + int64(allNum4)
+	LuaListDownloadSuccessAllNum = int64(successNum1) + int64(successNum2) + int64(successNum3) + int64(successNum4)
+	qu.Debug("lua列表页采集量:", LuaListDownloadAllNum, "采集成功量:", LuaListDownloadSuccessAllNum)
+}
+
+func SendLuaPythonAllNum() {
+	defer qu.Catch()
+	luaContent := fmt.Sprintf(NumContentModel, "Lua", LuaListDownloadAllNum, LuaListDownloadSuccessAllNum, LuaBiddingDownloadAllNum)
+	pythonContent := fmt.Sprintf(NumContentModel, "python", PythonListDownloadAllNum, PythonListDownloadSuccessAllNum, PythonBiddingDownloadAllNum)
+	resultContent := fmt.Sprintf(MarkdownModel, Publishtime+",Lua、Python各维度采集量统计结果如下:\n"+luaContent+pythonContent)
+	qu.Debug(resultContent)
+	//保存记录
+	util.MgoS.Save("spider_luapythoncount", map[string]interface{}{
+		"lualistnum":           LuaListDownloadAllNum,
+		"lualistsuccessnum":    LuaListDownloadSuccessAllNum,
+		"luabiddingnum":        LuaBiddingDownloadAllNum,
+		"pythonlistnum":        PythonListDownloadAllNum,
+		"pythonlistsuccessnum": PythonListDownloadSuccessAllNum,
+		"pythonbiddingnum":     PythonBiddingDownloadAllNum,
+		"comeintime":           time.Now().Unix(),
+		"date":                 Publishtime,
+	})
+	//发送统计
+	resp, err := http.Post(
+		"https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=97850772-88d0-4544-a2c3-6201aeddff9e",
+		"application/json",
+		bytes.NewBuffer([]byte(resultContent)),
+	)
+	if err != nil {
+		fmt.Println("request error:", err)
+		return
+	}
+	defer resp.Body.Close()
+}
 func SummaryCode() {
 	defer qu.Catch()
 	qu.Debug("上架爬虫信息汇总开始...")

+ 11 - 8
src/timetask/wxworkwarn.go

@@ -60,12 +60,13 @@ var PythonUserContentModel = `
      >待完成爬虫:<font color=\"warning\">%d个</font><font color=\"info\"></font>
      >未通过爬虫:<font color=\"warning\">%d个</font><font color=\"info\"></font>\n
 `
-var MarkdownModel = `{
-    "msgtype": "markdown",
-    "markdown": {
-        "content": "%s"
-    }
-}`
+
+//var MarkdownModel = `{
+//    "msgtype": "markdown",
+//    "markdown": {
+//        "content": "%s"
+//    }
+//}`
 var TextModel = `{
     "msgtype": "text",
     "text": {
@@ -361,8 +362,10 @@ func SendInfoToWxWork_ToAuditor() {
 	qu.Debug(LuaAuditorInfoMap)
 	for eu, userInfo := range LuaAuditorInfoMap {
 		query := map[string]interface{}{
-			"state":    1,
-			"platform": "golua平台",
+			"state": 1,
+			"platform": map[string]interface{}{
+				"$in": []string{"golua平台", "chrome"},
+			},
 		}
 		if eu == "wangdanting" { //王丹婷审核的施顺才的爬虫
 			query["modifyuser"] = map[string]interface{}{

+ 7 - 1
src/util/config.go

@@ -43,7 +43,7 @@ var (
 	CodeEventType              map[int]int   //节点对应的不同类型的采集频率
 	GMail                      *gm.GmailAuth //邮件信息
 	To                         string        //邮件接收人
-
+	CreateTaskInfoFormat       map[int]bool  //不创建任务的爬虫infoformat类型
 )
 var TimeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
 
@@ -153,4 +153,10 @@ func InitOther() {
 		ReTry:    qu.IntAll(mail["retry"]),
 	}
 	To = qu.ObjToString(mail["to"])
+
+	CreateTaskInfoFormat = map[int]bool{}
+	for infoformat, b := range Config["infoformat"].(map[string]interface{}) {
+		CreateTaskInfoFormat[qu.IntAll(infoformat)] = b.(bool)
+	}
+	qu.Debug("CreateTaskInfoFormat:", CreateTaskInfoFormat)
 }