|
@@ -250,7 +250,9 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
|
|
|
max = 100 //设置最大页为100
|
|
|
}
|
|
|
for ; start <= max && !s.Stop; start++ {
|
|
|
- UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "list") //记录所有节点列表页心跳
|
|
|
+ if !s.Stop { //在下载详情页时爬虫下架,此时不再存心跳信息
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "list") //记录所有节点列表页心跳
|
|
|
+ }
|
|
|
//qu.Debug("重复页:", repeatPageNum, " 配置最大页:", tmpMax, " 最终最大页:", max, " 当前页:", start, "重复次数:", repeatPageTimes)
|
|
|
if start > tmpMax && isRunRepeatList && repeatPageTimes >= 5 { //重复次数超过5次,不再翻页
|
|
|
break
|
|
@@ -527,7 +529,9 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
|
|
|
SaveHighListPageData(paramdata, href, num)
|
|
|
return
|
|
|
} else {
|
|
|
- UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=0老模式采集三级页心跳
|
|
|
+ if !s.Stop {
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=0老模式采集三级页心跳
|
|
|
+ }
|
|
|
isExist, _ := util.ExistRedis("title_repeat_judgement", 0, "url_repeat_"+href)
|
|
|
if isExist { //更新redis生命周期
|
|
|
util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*30)
|
|
@@ -572,7 +576,9 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
|
|
|
if t1 > time.Now().Unix() { //防止发布时间超前
|
|
|
data["publishtime"] = time.Now().Unix()
|
|
|
}
|
|
|
- UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=0老模式采集到数据心跳
|
|
|
+ if !s.Stop {
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=0老模式采集到数据心跳
|
|
|
+ }
|
|
|
delete(data, "state")
|
|
|
delete(data, "exit")
|
|
|
delete(data, "checkpublishtime")
|