|
@@ -10,12 +10,13 @@ import (
|
|
|
"util"
|
|
|
)
|
|
|
|
|
|
-const NEWTASK_LISTERR, NEWTASK_DATAINFOERR, NEWTASK_RATEERR, NEWTASK_DOWNLOADERR, NEWTASK_DATAINFOWARN = "1", "2", "4", "5", "6"
|
|
|
+const NEWTASK_LISTERR, NEWTASK_DATAINFOERR, NEWTASK_PAGEFLIPERR, NEWTASK_RATEERR, NEWTASK_DOWNLOADERR, NEWTASK_DATAINFOWARN = "1", "2", "3", "4", "5", "6"
|
|
|
|
|
|
var NewCodeInfoMap = map[string]*NewSpider{}
|
|
|
var LuaErrTypeInfo = map[string]string{
|
|
|
NEWTASK_LISTERR: "列表页异常",
|
|
|
NEWTASK_DATAINFOERR: "数据异常错误",
|
|
|
+ NEWTASK_PAGEFLIPERR: "爬虫翻页异常",
|
|
|
NEWTASK_RATEERR: "采集频率异常",
|
|
|
NEWTASK_DOWNLOADERR: "下载异常",
|
|
|
NEWTASK_DATAINFOWARN: "数据异常警告",
|
|
@@ -44,6 +45,7 @@ type NewSpider struct {
|
|
|
Channel string `bson:"channel"`
|
|
|
Platform string `bson:"platform"`
|
|
|
Event int `bson:"event"`
|
|
|
+ InfoFormat int `bson:"infoformat"`
|
|
|
PendState int `bson:"pendstate"`
|
|
|
ModifyUser string `bson:"modifyuser"`
|
|
|
ModifyId string `bson:"modifyuserid"`
|
|
@@ -52,6 +54,11 @@ type NewSpider struct {
|
|
|
Working int `bson:"working"`
|
|
|
AuditTime int64 `bson:"l_uploadtime"`
|
|
|
ListIsFilter bool `bson:"listisfilter"`
|
|
|
+ UpLimit int `bson:"uplimit"`
|
|
|
+ MaxPage int `bson:"maxpage"`
|
|
|
+ Page_FlipOk bool `bson:"page_flipok"`
|
|
|
+ Page_OneOk bool `bson:"page_oneok"`
|
|
|
+ Page_TwoOk bool `bson:"page_twook"`
|
|
|
CodeTags map[string]interface{} `bson:"codetags"`
|
|
|
//统计信息
|
|
|
Detail_DownloadNum int `bson:"detail_downloadnum"`
|
|
@@ -73,7 +80,7 @@ type NewSpider struct {
|
|
|
Comeintime int64 `bson:"comeintime"`
|
|
|
//异常汇总
|
|
|
//Error map[string]*ErrorInfo `json:"error"`
|
|
|
- ErrType int `bson:"errtype"` //记录权重最高的异常类型
|
|
|
+ ErrType string `bson:"errtype"` //记录权重最高的异常类型
|
|
|
ErrTypeMap map[int]bool `bson:"errtypemap"` //记录所有异常
|
|
|
ErrDescription string `bson:"errdescription"` //异常描述
|
|
|
}
|
|
@@ -115,7 +122,9 @@ func getCodeBaseInfo() {
|
|
|
},
|
|
|
//lua正在被维护的爬虫和上架爬虫
|
|
|
map[string]interface{}{
|
|
|
- "platform": "golua平台",
|
|
|
+ "platform": map[string]interface{}{
|
|
|
+ "$in": []string{"golua平台", "chrome"},
|
|
|
+ },
|
|
|
"state": map[string]interface{}{
|
|
|
"$in": []int{0, 1, 2}, //待完成、待审核、未通过
|
|
|
},
|
|
@@ -138,6 +147,8 @@ func getCodeBaseInfo() {
|
|
|
"l_uploadtime": 1,
|
|
|
"listisfilter": 1,
|
|
|
"codetags": 1,
|
|
|
+ "infoformat": 1,
|
|
|
+ "param_common": 1,
|
|
|
}
|
|
|
it := sess.DB(util.MgoEB.DbName).C("luaconfig").Find(&query).Select(&fields).Iter()
|
|
|
n := 0
|
|
@@ -152,9 +163,15 @@ func getCodeBaseInfo() {
|
|
|
sp := &NewSpider{
|
|
|
WarnInfoMap: map[int]*WarnInfo{},
|
|
|
//Error: map[string]*ErrorInfo{},
|
|
|
- ErrType: -1,
|
|
|
- ErrTypeMap: map[int]bool{},
|
|
|
+ ErrType: "-1",
|
|
|
+ ErrTypeMap: map[int]bool{},
|
|
|
+ Page_FlipOk: true,
|
|
|
+ Page_OneOk: true,
|
|
|
+ Page_TwoOk: true,
|
|
|
}
|
|
|
+ param_common := tmp["param_common"].([]interface{})
|
|
|
+ maxPage := qu.IntAll(param_common[5])
|
|
|
+ delete(tmp, "param_common")
|
|
|
luaByte, _ := bson.Marshal(tmp)
|
|
|
if bson.Unmarshal(luaByte, &sp) != nil {
|
|
|
qu.Info("初始化爬虫失败:", tmp["_id"])
|
|
@@ -162,6 +179,7 @@ func getCodeBaseInfo() {
|
|
|
}
|
|
|
sp.Working = util.CodeEventWorking[sp.Working]
|
|
|
sp.Model = util.CodeEventModel[sp.Event]
|
|
|
+ sp.MaxPage = maxPage
|
|
|
if sp.Platform == "python" {
|
|
|
sp.Model = 1
|
|
|
}
|
|
@@ -552,10 +570,13 @@ func getSpiderDownloadRateData() {
|
|
|
//},
|
|
|
}
|
|
|
fields := map[string]interface{}{
|
|
|
- "spidercode": 1,
|
|
|
- "alltimes": 1,
|
|
|
- "zero": 1,
|
|
|
- "oh_percent": 1,
|
|
|
+ "spidercode": 1,
|
|
|
+ "alltimes": 1,
|
|
|
+ "zero": 1,
|
|
|
+ "oh_percent": 1,
|
|
|
+ "uplimit": 1,
|
|
|
+ "page_fail": 1,
|
|
|
+ "page_onefail": 1,
|
|
|
}
|
|
|
it := sess.DB(util.MgoS.DbName).C("spider_downloadrate").Find(&query).Select(&fields).Iter()
|
|
|
n := 0
|
|
@@ -571,11 +592,18 @@ func getSpiderDownloadRateData() {
|
|
|
alltimes := qu.IntAll(tmp["alltimes"])
|
|
|
zero := qu.IntAll(tmp["zero"])
|
|
|
oh_percent := qu.IntAll(tmp["oh_percent"])
|
|
|
+ uplimit := qu.IntAll(tmp["uplimit"])
|
|
|
+ page_fail := qu.IntAll(tmp["page_fail"])
|
|
|
+ page_onefail := qu.IntAll(tmp["page_onefail"])
|
|
|
lock.Lock()
|
|
|
if sp := NewCodeInfoMap[code]; sp != nil {
|
|
|
sp.List_NoDataTimes = zero
|
|
|
sp.List_RunTimes = alltimes
|
|
|
sp.List_AllInTimes = oh_percent
|
|
|
+ sp.Page_FlipOk = !(uplimit > 0)
|
|
|
+ sp.UpLimit = uplimit
|
|
|
+ sp.Page_OneOk = !(page_onefail == alltimes && page_onefail > 0)
|
|
|
+ sp.Page_TwoOk = !(page_fail == alltimes && page_fail > 0)
|
|
|
}
|
|
|
lock.Unlock()
|
|
|
}(tmp)
|
|
@@ -626,7 +654,7 @@ func saveCodeInfo() {
|
|
|
codeInfoArr = []map[string]interface{}{}
|
|
|
}
|
|
|
if len(taskArr) > 500 {
|
|
|
- util.MgoEB.UpSertBulk("newtask", taskArr...)
|
|
|
+ util.MgoEB.UpSertBulk("task", taskArr...)
|
|
|
taskArr = [][]map[string]interface{}{}
|
|
|
}
|
|
|
}(spider)
|
|
@@ -637,7 +665,7 @@ func saveCodeInfo() {
|
|
|
codeInfoArr = []map[string]interface{}{}
|
|
|
}
|
|
|
if len(taskArr) > 0 {
|
|
|
- util.MgoEB.UpSertBulk("newtask", taskArr...)
|
|
|
+ util.MgoEB.UpSertBulk("task", taskArr...)
|
|
|
taskArr = [][]map[string]interface{}{}
|
|
|
}
|
|
|
NewCodeInfoMap = map[string]*NewSpider{}
|
|
@@ -649,7 +677,10 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
if sp.Event == 7000 {
|
|
|
return
|
|
|
}
|
|
|
- if sp.ErrType == -1 { //无异常
|
|
|
+ if sp.ErrType == "-1" { //无异常
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if !util.CreateTaskInfoFormat[sp.InfoFormat] { //非创建任务爬虫
|
|
|
return
|
|
|
}
|
|
|
//查询历史任务
|
|
@@ -660,12 +691,13 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
},
|
|
|
}
|
|
|
fields := map[string]interface{}{
|
|
|
- "i_state": 1,
|
|
|
- "s_type": 1,
|
|
|
- "s_descript": 1,
|
|
|
- "i_times": 1,
|
|
|
+ "i_state": 1,
|
|
|
+ "s_type": 1,
|
|
|
+ "s_descript": 1,
|
|
|
+ "i_times": 1,
|
|
|
+ "l_comeintime": 1,
|
|
|
}
|
|
|
- list, _ := util.MgoEB.Find("newtask", query, nil, fields, false, -1, -1)
|
|
|
+ list, _ := util.MgoEB.Find("task", query, nil, fields, false, -1, -1)
|
|
|
update := []map[string]interface{}{}
|
|
|
if list != nil && len(*list) > 0 { //已有任务
|
|
|
if len(*list) > 1 {
|
|
@@ -677,11 +709,12 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
})
|
|
|
return
|
|
|
}
|
|
|
- task := (*list)[0] //唯一任务
|
|
|
- state_old := qu.IntAll(task["i_state"]) //历史任务状态
|
|
|
- times_old := qu.IntAll(task["i_times"]) //历史任务次数
|
|
|
- type_old := qu.ObjToString(task["s_type"]) //历史任务异常类型
|
|
|
- descript_old := qu.ObjToString(task["s_descript"]) //历史任务描述
|
|
|
+ task := (*list)[0] //唯一任务
|
|
|
+ state_old := qu.IntAll(task["i_state"]) //历史任务状态
|
|
|
+ times_old := qu.IntAll(task["i_times"]) //历史任务次数
|
|
|
+ type_old := qu.ObjToString(task["s_type"]) //历史任务异常类型
|
|
|
+ descript_old := qu.ObjToString(task["s_descript"]) //历史任务描述
|
|
|
+ comeintime_old := qu.Int64All(task["l_comeintime"]) //历史任务创建时间
|
|
|
|
|
|
result := map[string]interface{}{
|
|
|
"i_event": sp.Event,
|
|
@@ -691,11 +724,19 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
}
|
|
|
//任务状态
|
|
|
if state_old == 0 {
|
|
|
- result["i_state"] = 1 //第二次任务,将历史待确认任务升级为待处理
|
|
|
+ if sp.ErrType == NEWTASK_LISTERR || sp.ErrType == NEWTASK_DATAINFOERR {
|
|
|
+ result["i_state"] = 1
|
|
|
+ } else if comeintime_old >= util.GetTime(-30) { //在一个月内有历史任务
|
|
|
+ result["i_state"] = 1
|
|
|
+ } else {
|
|
|
+ result["l_complete"] = util.CompleteTime("1")
|
|
|
+ result["l_comeintime"] = time.Now().Unix()
|
|
|
+ result["l_updatetime"] = time.Now().Unix()
|
|
|
+ }
|
|
|
}
|
|
|
//任务类型
|
|
|
- if sp.ErrType < qu.IntAll(type_old) { //取优先级高者
|
|
|
- result["s_type"] = fmt.Sprint(sp.ErrType)
|
|
|
+ if sp.ErrType < type_old { //取优先级高者
|
|
|
+ result["s_type"] = sp.ErrType
|
|
|
}
|
|
|
update = append(update, map[string]interface{}{"_id": task["_id"]})
|
|
|
update = append(update, map[string]interface{}{"$set": result})
|
|
@@ -704,10 +745,10 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
lock.Unlock()
|
|
|
} else { //无历史任务
|
|
|
state_new := 0
|
|
|
- if sp.ErrType == 1 && sp.Channel_Status != 200 { //列表页异常任务,栏目响应状态异常者,直接建待处理任务
|
|
|
- state_new = 1
|
|
|
- }
|
|
|
- if sp.ErrType == 2 { //数据异常错误类型,任务状态1
|
|
|
+ //if sp.ErrType == 1 && sp.Channel_Status != 200 { //列表页异常任务,栏目响应状态异常者,直接建待处理任务
|
|
|
+ // state_new = 1
|
|
|
+ //}
|
|
|
+ if sp.ErrType == NEWTASK_LISTERR || sp.ErrType == NEWTASK_DATAINFOERR {
|
|
|
state_new = 1
|
|
|
}
|
|
|
saveMap := map[string]interface{}{
|
|
@@ -719,7 +760,7 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
"i_event": sp.Event,
|
|
|
"i_state": state_new,
|
|
|
"s_source": "程序",
|
|
|
- "s_type": fmt.Sprint(sp.ErrType),
|
|
|
+ "s_type": sp.ErrType,
|
|
|
"s_descript": sp.ErrDescription,
|
|
|
"i_times": 1,
|
|
|
"l_comeintime": time.Now().Unix(),
|
|
@@ -738,6 +779,7 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
func getAllErr(sp *NewSpider) {
|
|
|
listErr(sp) //列表页异常
|
|
|
dataInfoErr(sp) //数据异常错误
|
|
|
+ pageFlipErr(sp) //爬虫翻页异常
|
|
|
downloadRateErr(sp) //下载频率异常
|
|
|
downloadFailedErr(sp) //下载异常
|
|
|
dataInfoWarn(sp) //数据异常警告
|
|
@@ -764,7 +806,7 @@ func listErr(sp *NewSpider) {
|
|
|
//sp.Error[NEWTASK_LISTERR] = &ErrorInfo{
|
|
|
// ErrInfo: map[string]bool{LuaErrTypeInfo[NEWTASK_LISTERR]: true},
|
|
|
//}
|
|
|
- sp.ErrType = qu.IntAll(NEWTASK_LISTERR)
|
|
|
+ sp.ErrType = NEWTASK_LISTERR
|
|
|
sp.ErrTypeMap[qu.IntAll(NEWTASK_LISTERR)] = true
|
|
|
heartTime := ""
|
|
|
if sp.HeartTime != 0 {
|
|
@@ -806,23 +848,65 @@ func dataInfoErr(sp *NewSpider) {
|
|
|
//}
|
|
|
sp.ErrDescription += "数据异常错误:\n" + resultDescription
|
|
|
sp.ErrTypeMap[qu.IntAll(NEWTASK_DATAINFOERR)] = true
|
|
|
- if sp.ErrType < 0 {
|
|
|
- sp.ErrType = qu.IntAll(NEWTASK_DATAINFOERR)
|
|
|
+ if sp.ErrType < "0" {
|
|
|
+ sp.ErrType = NEWTASK_DATAINFOERR
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
}
|
|
|
-func downloadRateErr(sp *NewSpider) {
|
|
|
+
|
|
|
+func pageFlipErr(sp *NewSpider) {
|
|
|
defer qu.Catch()
|
|
|
- if sp.Platform == "python" && !sp.Py_IsValid {
|
|
|
+ if sp.Platform == "python" {
|
|
|
return
|
|
|
}
|
|
|
- if sp.List_AllInTimes > 0 {
|
|
|
- errFlag := false
|
|
|
- if sp.Model == 1 && sp.AuditTime > 24 { //分开采集,且爬虫审核时间超过24小时,记录异常
|
|
|
+ errFlag := false
|
|
|
+ if sp.CodeTags != nil {
|
|
|
+ tagTime, _ := sp.CodeTags[NEWTASK_PAGEFLIPERR].(int64)
|
|
|
+ if tagTime == 0 { //无翻页异常标记
|
|
|
+ errFlag = true
|
|
|
+ } else if tagTime > 0 && tagTime <= util.GetTime(-7) { //标记失效
|
|
|
errFlag = true
|
|
|
- } else if sp.Event != 7410 { //顺序采集(7410节点不建采集频率异常任务)
|
|
|
+ }
|
|
|
+ } else { //无标记,记录翻页异常
|
|
|
+ errFlag = true
|
|
|
+ }
|
|
|
+ if errFlag {
|
|
|
+ //1、无限翻页爬虫列表页采集时超过最大限制页,高性能100页,队列50页
|
|
|
+ if !sp.Page_FlipOk && sp.Model == 1 {
|
|
|
+ sp.ErrTypeMap[qu.IntAll(NEWTASK_PAGEFLIPERR)] = true
|
|
|
+ sp.ErrDescription += "爬虫翻页异常:\n 列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.UpLimit) + "轮列表页采集翻页超过最大限制\n"
|
|
|
+ if sp.ErrType < "0" {
|
|
|
+ sp.ErrType = NEWTASK_PAGEFLIPERR
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //2、爬虫列表页采集第一页无数据,第二页有数据
|
|
|
+ if !sp.Page_OneOk {
|
|
|
+ sp.ErrTypeMap[qu.IntAll(NEWTASK_PAGEFLIPERR)] = true
|
|
|
+ sp.ErrDescription += "爬虫翻页异常:\n 列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_RunTimes) + "轮爬虫未采集到第一页数据\n"
|
|
|
+ if sp.ErrType < "0" {
|
|
|
+ sp.ErrType = NEWTASK_PAGEFLIPERR
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //3、爬虫列表页采集第一页有数据,第二页无数据或第二页数据与第一页数据相同
|
|
|
+ if !sp.Page_TwoOk {
|
|
|
+ sp.ErrTypeMap[qu.IntAll(NEWTASK_PAGEFLIPERR)] = true
|
|
|
+ sp.ErrDescription += "爬虫翻页异常:\n 列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_RunTimes) + "轮爬虫采集的第一、二页数据相同或未采集到第二页数据\n"
|
|
|
+ if sp.ErrType < "0" {
|
|
|
+ sp.ErrType = NEWTASK_PAGEFLIPERR
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func downloadRateErr(sp *NewSpider) {
|
|
|
+ defer qu.Catch()
|
|
|
+ if sp.Platform == "python" {
|
|
|
+ if !sp.Py_IsValid { //无效爬虫
|
|
|
+ return
|
|
|
+ } else {
|
|
|
+ errFlag := false
|
|
|
if sp.CodeTags != nil {
|
|
|
tagTime, _ := sp.CodeTags[NEWTASK_RATEERR].(int64)
|
|
|
if tagTime == 0 { //无频率异常标记
|
|
@@ -830,18 +914,43 @@ func downloadRateErr(sp *NewSpider) {
|
|
|
} else if tagTime > 0 && tagTime <= util.GetTime(-7) { //标记失效
|
|
|
errFlag = true
|
|
|
}
|
|
|
- } else { //无标记,记录列表页异常
|
|
|
+ } else { //无标记,记录采集频率异常
|
|
|
errFlag = true
|
|
|
}
|
|
|
+ if errFlag && sp.List_AllInTimes > 0 && sp.AuditTime > 24 {
|
|
|
+ sp.ErrTypeMap[qu.IntAll(NEWTASK_RATEERR)] = true
|
|
|
+ sp.ErrDescription += "采集频率异常:\n 列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_AllInTimes) + "轮数据全采\n"
|
|
|
+ if sp.ErrType < "0" {
|
|
|
+ sp.ErrType = NEWTASK_RATEERR
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
- if errFlag {
|
|
|
- //sp.Error[NEWTASK_RATEERR] = &ErrorInfo{
|
|
|
- // ErrInfo: map[string]bool{LuaErrTypeInfo[NEWTASK_RATEERR]: true},
|
|
|
- //}
|
|
|
- sp.ErrTypeMap[qu.IntAll(NEWTASK_RATEERR)] = true
|
|
|
- sp.ErrDescription += "采集频率异常:\n 列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_AllInTimes) + "轮数据全采\n"
|
|
|
- if sp.ErrType < 0 {
|
|
|
- sp.ErrType = qu.IntAll(NEWTASK_RATEERR)
|
|
|
+ } else { //lua
|
|
|
+ if sp.List_AllInTimes > 0 {
|
|
|
+ errFlag := false
|
|
|
+ if sp.Model == 1 && sp.AuditTime > 24 && (sp.MaxPage == 1 || sp.MaxPage > 100) { //分开采集,且爬虫审核时间超过24小时,记录异常
|
|
|
+ errFlag = true
|
|
|
+ } else if sp.Event != 7410 { //顺序采集(7410节点不建采集频率异常任务)
|
|
|
+ if sp.CodeTags != nil {
|
|
|
+ tagTime, _ := sp.CodeTags[NEWTASK_RATEERR].(int64)
|
|
|
+ if tagTime == 0 { //无频率异常标记
|
|
|
+ errFlag = true
|
|
|
+ } else if tagTime > 0 && tagTime <= util.GetTime(-7) { //标记失效
|
|
|
+ errFlag = true
|
|
|
+ }
|
|
|
+ } else { //无标记,记录采集频率异常
|
|
|
+ errFlag = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if errFlag {
|
|
|
+ //sp.Error[NEWTASK_RATEERR] = &ErrorInfo{
|
|
|
+ // ErrInfo: map[string]bool{LuaErrTypeInfo[NEWTASK_RATEERR]: true},
|
|
|
+ //}
|
|
|
+ sp.ErrTypeMap[qu.IntAll(NEWTASK_RATEERR)] = true
|
|
|
+ sp.ErrDescription += "采集频率异常:\n 列表页共采集" + fmt.Sprint(sp.List_RunTimes) + "轮,其中有" + fmt.Sprint(sp.List_AllInTimes) + "轮数据全采\n"
|
|
|
+ if sp.ErrType < "0" {
|
|
|
+ sp.ErrType = NEWTASK_RATEERR
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -925,8 +1034,8 @@ func downloadFailedErr(sp *NewSpider) {
|
|
|
}
|
|
|
sp.getErrHrefs("spider_highlistdata", NEWTASK_DOWNLOADERR, q)
|
|
|
sp.ErrTypeMap[qu.IntAll(NEWTASK_DOWNLOADERR)] = true
|
|
|
- if sp.ErrType < 0 {
|
|
|
- sp.ErrType = qu.IntAll(NEWTASK_DOWNLOADERR)
|
|
|
+ if sp.ErrType < "0" {
|
|
|
+ sp.ErrType = NEWTASK_DOWNLOADERR
|
|
|
}
|
|
|
}
|
|
|
} else { //顺序采集
|
|
@@ -943,8 +1052,8 @@ func downloadFailedErr(sp *NewSpider) {
|
|
|
count := sp.getErrHrefs("spider_listdata", NEWTASK_DOWNLOADERR, q)
|
|
|
if count > 0 {
|
|
|
sp.ErrTypeMap[qu.IntAll(NEWTASK_DOWNLOADERR)] = true
|
|
|
- if sp.ErrType < 0 {
|
|
|
- sp.ErrType = qu.IntAll(NEWTASK_DOWNLOADERR)
|
|
|
+ if sp.ErrType < "0" {
|
|
|
+ sp.ErrType = NEWTASK_DOWNLOADERR
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -980,8 +1089,8 @@ func dataInfoWarn(sp *NewSpider) {
|
|
|
//}
|
|
|
sp.ErrDescription += "数据异常警告:\n" + resultDescription
|
|
|
sp.ErrTypeMap[qu.IntAll(NEWTASK_DATAINFOWARN)] = true
|
|
|
- if sp.ErrType < 0 {
|
|
|
- sp.ErrType = qu.IntAll(NEWTASK_DATAINFOWARN)
|
|
|
+ if sp.ErrType < "0" {
|
|
|
+ sp.ErrType = NEWTASK_DATAINFOWARN
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -999,7 +1108,7 @@ func (sp *NewSpider) getErrHrefs(coll, errType string, query map[string]interfac
|
|
|
return
|
|
|
}
|
|
|
sp.ErrDescription += LuaErrTypeInfo[NEWTASK_DOWNLOADERR] + ":共下载" + fmt.Sprint(sp.Detail_DownloadNum) + "条,失败" + fmt.Sprint(sp.Detail_DownloadFailNum) + "条\n"
|
|
|
- if sp.Platform != "golua平台" {
|
|
|
+ if sp.Platform != "golua平台" || sp.Platform != "chrome" {
|
|
|
return
|
|
|
}
|
|
|
list, _ := util.MgoS.Find(coll, query, nil, `{"href":1}`, false, 0, 3)
|
|
@@ -1044,7 +1153,7 @@ func closeTask() {
|
|
|
"l_closetime": time.Now().Unix(),
|
|
|
},
|
|
|
}
|
|
|
- util.MgoEB.Update("newtask", query, set, false, true)
|
|
|
+ util.MgoEB.Update("task", query, set, false, true)
|
|
|
}
|
|
|
|
|
|
/*
|