maxiaoshan 2 vuotta sitten
vanhempi
commit
d2e91a58dc
3 muutettua tiedostoa jossa 84 lisäystä ja 79 poistoa
  1. 1 1
      src/luatask/task.go
  2. 67 64
      src/timetask/random.go
  3. 16 14
      src/timetask/report.go

+ 1 - 1
src/luatask/task.go

@@ -1067,7 +1067,7 @@ func GetSpiderWarnErrData() {
 				wg.Done()
 			}()
 			infoText := qu.ObjToString(tmp["info"])
-			level := qu.IntAll("level")
+			level := qu.IntAll(tmp["level"])
 			stype := "" //记录异常类型
 			if level == 2 {
 				if infoText == "Field Value Not Contains Chinese" { //title、detail不含中文(数据分析处理每日异常数据)

+ 67 - 64
src/timetask/random.go

@@ -23,11 +23,14 @@ type WarnInfo struct {
 	Repeat   bool
 }
 
-var StypeArr = []string{
-	"Field Value Is Null",
-	"Field Value Contains Random Code",
-	"Field Value Not Contains Chinese",
-	"Detail File Err",
+var ErrorStypeMap = map[string]int{
+	"Field Value Is Null":              2,
+	"Field Value Contains Random Code": 2,
+	"Field Value Not Contains Chinese": 2,
+	"Detail File Err":                  2,
+}
+var WarnStypeMap = map[string]int{
+	"Attachment Upload Failed": 1,
 }
 var httpReg = regexp.MustCompile(`^(http|https).*`)
 
@@ -153,21 +156,18 @@ func GetSpiderWarnData() {
 	defer util.MgoS.DestoryMongoConn(sess)
 	stime := util.GetTime(-1)
 	etime := util.GetTime(0)
-	//if time.Now().Weekday().String() == "Monday" {
-	//	stime = util.GetTime(-3)
-	//}
 	query := map[string]interface{}{
 		"comeintime": map[string]interface{}{
 			"$gte": stime,
 			"$lt":  etime,
 		},
-		"info": map[string]interface{}{ //保存服务更新后这个条件可去掉2022-11-28
-			"$in": StypeArr,
-		},
-		"level": 2,
+		//"info": map[string]interface{}{ //保存服务更新后这个条件可去掉2022-11-28
+		//	"$in": StypeArr,
+		//},
+		//"level": 2,
 	}
 	invalidDate := time.Now().AddDate(-2, 0, 0).Unix()
-	ch := make(chan bool, 2)
+	ch := make(chan bool, 3)
 	wg := &sync.WaitGroup{}
 	lock := &sync.Mutex{}
 	result := map[string]*WarnInfo{}
@@ -181,55 +181,58 @@ func GetSpiderWarnData() {
 				<-ch
 				wg.Done()
 			}()
-			publishtime := int64(0)
-			data, ok := tmp["data"].(map[string]interface{})
-			if ok {
-				if ptime := data["publishtime"]; ptime != nil {
-					publishtime = qu.Int64All(ptime)
-					if publishtime > 0 && publishtime < invalidDate { //两年前的历史数据不再推送修改
-						return
-					}
-				}
-			}
 			info := qu.ObjToString(tmp["info"])
-			if info == "Detail File Err" { //正文是链接的,进行链接判重
-				hrefDetail := httpReg.FindString(qu.ObjToString(data["detail"]))
-				if hrefDetail != "" {
-					esQuery := `{"query": {"bool": {"must": [{"term": {"href": "` + hrefDetail + `"}}]}}}`
-					if util.Es.Count(util.EsIndex, util.EsType, esQuery) >= 1 {
-						return
+			level := qu.IntAll(tmp["level"])
+			//指定的错误类型和级别匹配的数据,推送spider_warn_err
+			if ErrorStypeMap[info] == level || WarnStypeMap[info] == level {
+				publishtime := int64(0)
+				data, ok := tmp["data"].(map[string]interface{})
+				if ok {
+					if ptime := data["publishtime"]; ptime != nil {
+						publishtime = qu.Int64All(ptime)
+						if publishtime > 0 && publishtime < invalidDate { //两年前的历史数据不再推送修改
+							return
+						}
 					}
 				}
-			}
-			href := qu.ObjToString(tmp["href"])
-			level := qu.IntAll(tmp["level"])
-			field := qu.ObjToString(tmp["field"])
-			title := qu.ObjToString(tmp["title"])
-			//数据验证,是否有title一致,相似publishtime的数据,视为一样的数据,不需要再修复
-			repeat := RepeatData(title, publishtime)
-			lock.Lock()
-			if warnInfo := result[href]; warnInfo == nil {
-				warnInfo = &WarnInfo{
-					Fields:   map[string]bool{field: true},
-					MaxLevel: level,
-					Data:     data,
-					Site:     tmp["site"],
-					Channel:  tmp["channel"],
-					Title:    title,
-					Infos:    map[string]bool{info: true},
-					Code:     tmp["code"],
-					Href:     href,
-					Repeat:   repeat,
+				if info == "Detail File Err" { //正文是链接的,进行链接判重
+					hrefDetail := httpReg.FindString(qu.ObjToString(data["detail"]))
+					if hrefDetail != "" {
+						esQuery := `{"query": {"bool": {"must": [{"term": {"href": "` + hrefDetail + `"}}]}}}`
+						if util.Es.Count(util.EsIndex, util.EsType, esQuery) >= 1 {
+							return
+						}
+					}
 				}
-				result[href] = warnInfo
-			} else {
-				warnInfo.Fields[field] = true
-				warnInfo.Infos[info] = true
-				if warnInfo.MaxLevel < level {
-					warnInfo.MaxLevel = level
+				href := qu.ObjToString(tmp["href"])
+				field := qu.ObjToString(tmp["field"])
+				title := qu.ObjToString(tmp["title"])
+				//数据验证,是否有title一致,相似publishtime的数据,视为一样的数据,不需要再修复
+				repeat := RepeatData(title, publishtime)
+				lock.Lock()
+				if warnInfo := result[href]; warnInfo == nil {
+					warnInfo = &WarnInfo{
+						Fields:   map[string]bool{field: true},
+						MaxLevel: level,
+						Data:     data,
+						Site:     tmp["site"],
+						Channel:  tmp["channel"],
+						Title:    title,
+						Infos:    map[string]bool{info: true},
+						Code:     tmp["code"],
+						Href:     href,
+						Repeat:   repeat,
+					}
+					result[href] = warnInfo
+				} else {
+					warnInfo.Fields[field] = true
+					warnInfo.Infos[info] = true
+					if warnInfo.MaxLevel < level {
+						warnInfo.MaxLevel = level
+					}
 				}
+				lock.Unlock()
 			}
-			lock.Unlock()
 		}(tmp)
 		if n%1000 == 0 {
 			qu.Debug("current:", n)
@@ -285,14 +288,14 @@ func GetSpiderWarnData() {
 }
 
 func RepeatData(title string, publishtime int64) bool {
-	return util.MgoB.Count("bidding",
-		map[string]interface{}{
-			"title": title,
-			"publishtime": map[string]interface{}{
-				"$gte": publishtime + 86400*3,
-				"$lte": publishtime - 86400*3,
-			},
-		}) > 0
+	q := map[string]interface{}{
+		"title": title,
+		"publishtime": map[string]interface{}{
+			"$lte": publishtime + 86400*3,
+			"$gte": publishtime - 86400*3,
+		},
+	}
+	return util.MgoB.Count("bidding", q) > 0
 }
 
 /*

+ 16 - 14
src/timetask/report.go

@@ -22,6 +22,7 @@ func SpiderWeeklyReport() {
 	sTime := util.GetTime(-7)
 	qu.Debug(sTime, eTime)
 	//上周统计信息
+
 	LastWeekAddTaskAllNum, //上周新建任务数
 		LastWeekAddLuaNum,                          //lua上周新增爬虫数
 		LastWeekAddEffectTaskNum,                   //上周新增有效任务数
@@ -522,20 +523,21 @@ func GetLastWeekReport() (int, int, int, int, int, int, int, int, int, int, int,
 	datas, _ := util.MgoS.Find("spider_weeklyreport", nil, map[string]interface{}{"_id": -1}, nil, true, 0, 1)
 	if len(*datas) == 1 {
 		data := (*datas)[0]
-		return qu.IntAll(data["addtasknum"]),
-			qu.IntAll(data["addluanum"]),
-			qu.IntAll(data["addeffecttasknum"]),
-			qu.IntAll(data["addpythonnum"]),
-			qu.IntAll(data["checktasknum"]),
-			qu.IntAll(data["finishluanum"]),
-			qu.IntAll(data["finisheffecttasknum"]),
-			qu.IntAll(data["finishpythonnum"]),
-			qu.IntAll(data["allcodenum"]),
-			qu.IntAll(data["allsitenum"]),
-			qu.IntAll(data["mgonum"]),
-			qu.IntAll(data["esnum"]),
-			qu.IntAll(data["competemgonum"]),
-			qu.IntAll(data["competeesnum"])
+		addtasknum := qu.IntAll(data["addtasknum"])
+		addluanum := qu.IntAll(data["addluanum"])
+		addeffecttasknum := qu.IntAll(data["addeffecttasknum"])
+		addpythonnum := qu.IntAll(data["addpythonnum"])
+		checktasknum := qu.IntAll(data["checktasknum"])
+		finishluanum := qu.IntAll(data["finishluanum"])
+		finisheffecttasknum := qu.IntAll(data["finisheffecttasknum"])
+		finishpythonnum := qu.IntAll(data["finishpythonnum"])
+		allcodenum := qu.IntAll(data["allcodenum"])
+		allsitenum := qu.IntAll(data["allsitenum"])
+		mgonum := qu.IntAll(data["mgonum"])
+		esnum := qu.IntAll(data["esnum"])
+		competemgonum := qu.IntAll(data["competemgonum"])
+		competeesnum := qu.IntAll(data["competeesnum"])
+		return addtasknum, addluanum, addeffecttasknum, addpythonnum, checktasknum, finishluanum, finisheffecttasknum, finishpythonnum, allcodenum, allsitenum, mgonum, esnum, competemgonum, competeesnum
 	} else {
 		qu.Debug("历史周报信息查询失败")
 	}