|
@@ -604,7 +604,17 @@ func getSpiderDownloadRateData() {
|
|
|
sp.List_AllInTimes = oh_percent
|
|
|
sp.Page_FlipOk = !(uplimit > 0)
|
|
|
sp.UpLimit = uplimit
|
|
|
+ //判断第一页采集是否异常
|
|
|
sp.Page_OneOk = !(page_onefail == alltimes && page_onefail > 0)
|
|
|
+ if sp.Page_OneOk {
|
|
|
+ percent := float64(page_onefail) / float64(alltimes)
|
|
|
+ if page_onefail <= 5 && (percent > 0.75 && percent < 1) {
|
|
|
+ sp.Page_OneOk = false
|
|
|
+ } else if page_onefail > 5 && (percent > 0.8 && percent < 1) {
|
|
|
+ sp.Page_OneOk = false
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //判断第二页采集是否异常
|
|
|
sp.Page_TwoOk = !(page_fail == alltimes && page_fail > 0)
|
|
|
}
|
|
|
lock.Unlock()
|
|
@@ -871,7 +881,7 @@ func pageFlipErr(sp *NewSpider) {
|
|
|
tagTime, _ := sp.CodeTags[NEWTASK_PAGEFLIPERR].(int64)
|
|
|
if tagTime == 0 { //无翻页异常标记
|
|
|
errFlag = true
|
|
|
- } else if tagTime > 0 && tagTime <= util.GetTime(-7) { //标记失效
|
|
|
+ } else if tagTime > 0 && tagTime <= util.GetTime(-120) { //标记失效
|
|
|
errFlag = true
|
|
|
}
|
|
|
} else { //无标记,记录翻页异常
|
|
@@ -933,9 +943,11 @@ func downloadRateErr(sp *NewSpider) {
|
|
|
} else { //lua
|
|
|
if sp.List_AllInTimes > 0 {
|
|
|
errFlag := false
|
|
|
- if sp.Model == 1 && sp.AuditTime > 24 && (sp.MaxPage == 1 || sp.MaxPage > 100) { //分开采集,且爬虫审核时间超过24小时,记录异常
|
|
|
- errFlag = true
|
|
|
- } else if sp.Event != 7410 { //顺序采集(7410节点不建采集频率异常任务)
|
|
|
+ if sp.Model == 1 { //列表页、详情页分开采集模式
|
|
|
+ if sp.AuditTime > 24 && (sp.MaxPage == 1 || sp.MaxPage > 100) { //分开采集且不是无限翻页,爬虫审核时间超过24小时,记录异常
|
|
|
+ errFlag = true
|
|
|
+ }
|
|
|
+ } else if sp.Event != 7410 { //列表页、详情页顺序采集模式(排除7410节点)
|
|
|
if sp.CodeTags != nil {
|
|
|
tagTime, _ := sp.CodeTags[NEWTASK_RATEERR].(int64)
|
|
|
if tagTime == 0 { //无频率异常标记
|
|
@@ -1125,7 +1137,7 @@ func (sp *NewSpider) getErrHrefs(coll, errType string, query map[string]interfac
|
|
|
for _, l := range *list {
|
|
|
href := qu.ObjToString(l["href"])
|
|
|
//errHrefs = append(errHrefs, &ErrRemark{Href: href})
|
|
|
- sp.ErrDescription += " " + href + "\n"
|
|
|
+ sp.ErrDescription += href + "\n"
|
|
|
}
|
|
|
//sp.Error[errType] = &ErrorInfo{
|
|
|
// Num: sp.Detail_DownloadFailNum,
|