Browse Source

任务逻辑修改

maxiaoshan 2 years ago
parent
commit
e0ed852328
1 changed files with 52 additions and 7 deletions
  1. 52 7
      src/luatask/newtask.go

+ 52 - 7
src/luatask/newtask.go

@@ -88,6 +88,7 @@ func NewStartTask() {
 	getLuaSummaryInfo()    //获取lua汇总信息
 	getSpiderWarnInfo()    //获取异常数据
 	saveCodeInfo()         //汇总异常信息,产出任务
+	closeTask()
 }
 
 func getCodeBaseInfo() {
@@ -246,7 +247,14 @@ func getSpiderWarnInfo() {
 		},
 	}
 	fields := map[string]interface{}{
-		"data": 0,
+		"field":                 1,
+		"level":                 1,
+		"info":                  1,
+		"code":                  1,
+		"infotype":              1,
+		"href":                  1,
+		"data.publishtime":      1,
+		"data.l_np_publishtime": 1,
 	}
 	it := sess.DB(util.MgoS.DbName).C("spider_warn").Find(&query).Select(&fields).Iter()
 	n := 0
@@ -272,6 +280,19 @@ func getSpiderWarnInfo() {
 			} else if infotype == 8 && field == "projectinfo" {
 				return
 			}
+			if infotype == 2 || infotype == 6 || infotype == 8 {
+				if data, ok := tmp["data"].(map[string]interface{}); ok {
+					var ptime int64
+					if l_np_publishtime := data["l_np_publishtime"]; l_np_publishtime != nil {
+						ptime = qu.Int64All(l_np_publishtime)
+					} else if publishtime := data["publishtime"]; publishtime != nil {
+						ptime = qu.Int64All(publishtime)
+					}
+					if ptime < time.Now().AddDate(0, -6, 0).Unix() { //半年内的异常数据有效
+						return
+					}
+				}
+			}
 			code := qu.ObjToString(tmp["code"])
 			info := qu.ObjToString(tmp["info"])
 			href := qu.ObjToString(tmp["href"])
@@ -330,11 +351,11 @@ func getSpiderHeart() {
 			findListHeart := qu.Int64All(tmp["findlist"])
 			lock.Lock()
 			if sp := NewCodeInfoMap[code]; sp != nil {
-				limitDayNum := 0
-				if sp.Event == 7520 { //由于7520节点爬虫循环一轮的时间较长,心跳有可能仍是前一天的
-					limitDayNum = -1
-				}
-				sp.List_IsGetData = findListHeart > util.GetTime(limitDayNum)
+				//limitDayNum := 0
+				//if sp.Event == 7520 { //由于7520节点爬虫循环一轮的时间较长,心跳有可能仍是前一天的
+				//	limitDayNum = -1
+				//}
+				sp.List_IsGetData = findListHeart > util.GetTime(0)-int64(12*3600) //前一天12点
 			}
 			lock.Unlock()
 		}(tmp)
@@ -696,7 +717,11 @@ func listErr(sp *NewSpider) {
 			//}
 			sp.ErrType = qu.IntAll(NEWTASK_LISTERR)
 			sp.ErrTypeMap[qu.IntAll(NEWTASK_LISTERR)] = true
-			sp.ErrDescription += "列表页异常:\n	列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_NoDataTimes) + "轮无数据\n"
+			if !sp.List_IsGetData {
+				sp.ErrDescription += "列表页异常:\n	无最新心跳\n"
+			} else if sp.List_RunTimes == 0 {
+				sp.ErrDescription += "列表页异常:\n	列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_NoDataTimes) + "轮无数据\n"
+			}
 		}
 	}
 }
@@ -921,6 +946,26 @@ func (sp *NewSpider) getErrHrefs(coll, errType string, query map[string]interfac
 	return
 }
 
+//关闭任务
+func closeTask() {
+	defer qu.Catch()
+	query := map[string]interface{}{ //关闭7天未转为待处理的下载异常,数据异常警告类型的任务
+		"l_comeintime": map[string]interface{}{
+			"$lte": util.GetTime(-7),
+		},
+		"i_state": 0,
+		"s_type": map[string]interface{}{
+			"$in": []string{"5", "6"},
+		},
+	}
+	set := map[string]interface{}{
+		"$set": map[string]interface{}{
+			"l_closetime": time.Now().Unix(),
+		},
+	}
+	util.MgoEB.Update("newtask", query, set, false, true)
+}
+
 /*
 	1、列表页统计的是当天心跳,提前告警。如果当天心跳有问题呢?
 	2、下载异常由于原网站详情页无信息造成的,如何提高任务准确率?