maxiaoshan 3 роки тому
батько
коміт
3da783f3d8
1 змінених файлів з 5 додано та 4 видалено
  1. 5 4
      src/spider/spider.go

+ 5 - 4
src/spider/spider.go

@@ -981,9 +981,9 @@ func (s *Spider) DownloadListDetail() {
 	isEsRepeat := false                                                             //是否进行es判重
 	if delayDay := DelaySites[s.Name]; delayDay > 0 {
 		isEsRepeat = true
-		if delayDay <= util.Config.DayNum { //判断该爬虫是否属于要延迟采集的站点,数据延迟delayDay采集(由于7410、7500、7700为顺序采集,无法延时)
+		if delayDay <= util.Config.DayNum*24 { //判断该爬虫是否属于要延迟采集的站点,数据延迟delayDay小时采集(由于7410、7500、7700为顺序采集,无法延时)
 			//comeintimeQuery["$lte"] = GetTime(-delayDay + 1)
-			comeintimeQuery["$lte"] = time.Now().Unix() - int64(86400*delayDay)
+			comeintimeQuery["$lte"] = time.Now().Unix() - int64(3600*delayDay)
 		}
 	}
 	q := map[string]interface{}{
@@ -998,7 +998,7 @@ func (s *Spider) DownloadListDetail() {
 		"event":      0,
 	}
 	if !s.Stop { //在下载详情页时爬虫下架,此时不再存心跳信息
-		UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=1下载数据心跳
+		UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
 	}
 	list, _ := Mgo.Find("spider_highlistdata", q, o, f, false, 0, 100)
 	if list != nil && len(*list) > 0 {
@@ -1019,7 +1019,8 @@ func (s *Spider) DownloadListDetail() {
 				eTime := time.Now().Unix()
 				sTime := eTime - int64(7*86400)
 				esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
-				if Es.Count(EsIndex, EsType, esQuery) > 0 { //es中含本title数据,不再采集,更新list表数据状态
+				count := Es.Count(EsIndex, EsType, esQuery)
+				if count > 0 { //es中含本title数据,不再采集,更新list表数据状态
 					set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true, "updatetime": time.Now().Unix()}} //已存在state置为1
 					Mgo.Update("spider_highlistdata", query, set, false, false)
 					util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*365)