|
@@ -310,6 +310,7 @@ func GetDTPErrData() {
|
|
|
"site": 1,
|
|
|
"channel": 1,
|
|
|
"field": 1,
|
|
|
+ "info": 1,
|
|
|
}
|
|
|
query := map[string]interface{}{
|
|
|
"comeintime": map[string]interface{}{
|
|
@@ -330,17 +331,22 @@ func GetDTPErrData() {
|
|
|
<-ch
|
|
|
wg.Done()
|
|
|
}()
|
|
|
- code := qu.ObjToString(tmp["code"])
|
|
|
- href := qu.ObjToString(tmp["href"])
|
|
|
- site := qu.ObjToString(tmp["site"])
|
|
|
- channel := qu.ObjToString(tmp["channel"])
|
|
|
- field := qu.ObjToString(tmp["field"])
|
|
|
errnum := "3" //detail、 title异常
|
|
|
destmp := "正文标题异常:\n"
|
|
|
+ field := qu.ObjToString(tmp["field"])
|
|
|
+ info := qu.ObjToString(tmp["info"])
|
|
|
if field == "publishtime" { //发布时间异常
|
|
|
+ if info == "Publishtime Is Too Late" { //发布时间超前的不建任务
|
|
|
+ return
|
|
|
+ }
|
|
|
errnum = "4"
|
|
|
destmp = "发布时间异常:\n"
|
|
|
}
|
|
|
+ code := qu.ObjToString(tmp["code"])
|
|
|
+ href := qu.ObjToString(tmp["href"])
|
|
|
+ site := qu.ObjToString(tmp["site"])
|
|
|
+ channel := qu.ObjToString(tmp["channel"])
|
|
|
+
|
|
|
lock.Lock()
|
|
|
if t := TaskMap[code]; t != nil {
|
|
|
if info := t.ErrInfo[errnum]; info != nil {
|
|
@@ -409,6 +415,11 @@ func GetStatusCodeErrorData() {
|
|
|
logger.Debug("共有404地址", len(*list), "条")
|
|
|
for _, tmp := range *list {
|
|
|
code := qu.ObjToString(tmp["code"])
|
|
|
+ one, _ := MgoE.FindOneByField("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"state": 1})
|
|
|
+ state := qu.IntAll((*one)["state"])
|
|
|
+ if state == 4 || state > 6 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
//判断3天内是否有采集数据,有则不建404任务
|
|
|
stime, etime := GetTime(-3), GetTime(0)
|
|
|
q := map[string]interface{}{
|
|
@@ -562,7 +573,17 @@ func SaveResult() {
|
|
|
<-ch
|
|
|
wg.Done()
|
|
|
}()
|
|
|
- if YearMinCodeMap[t.Code] { //luayearmincode中爬虫任务删除
|
|
|
+ delYearMinCode := false
|
|
|
+ if errInfo := t.ErrInfo; errInfo != nil {
|
|
|
+ //爬虫任务为下载异常、运行异常、404、时间异常、数据异常任务时,不再建该爬虫的抽查任务
|
|
|
+ if len(errInfo) >= 2 || (len(errInfo) == 1 && errInfo["1"] == nil) { //不是数量异常任务
|
|
|
+ delYearMinCode = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if delYearMinCode {
|
|
|
+ delete(YearMinCodeMap, t.Code)
|
|
|
+ go MgoE.Update("luayearmincode", map[string]interface{}{"code": t.Code}, map[string]interface{}{"$set": map[string]interface{}{"send": true}}, false, false)
|
|
|
+ } else if YearMinCodeMap[t.Code] { //luayearmincode中爬虫任务删除
|
|
|
return
|
|
|
}
|
|
|
result := map[string]interface{}{}
|