|
@@ -549,9 +549,9 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
|
|
isEsRepeat := false
|
|
isEsRepeat := false
|
|
if delayDay := util.Config.DelaySites[s.Name]; delayDay > 0 { //类竞品站点爬虫title做es7天内判重检验(顺序采集无法延迟,只能判重)
|
|
if delayDay := util.Config.DelaySites[s.Name]; delayDay > 0 { //类竞品站点爬虫title做es7天内判重检验(顺序采集无法延迟,只能判重)
|
|
title := qu.ObjToString(paramdata["title"])
|
|
title := qu.ObjToString(paramdata["title"])
|
|
- eTime := fmt.Sprint(GetTime(0))
|
|
|
|
- sTime := fmt.Sprint(GetTime(-7))
|
|
|
|
- esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + sTime + `","lte": "` + eTime + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
|
|
|
|
|
|
+ eTime := time.Now().Unix()
|
|
|
|
+ sTime := eTime - int64(7*86400)
|
|
|
|
+ esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
|
|
if Es.Count(EsIndex, EsType, esQuery) > 0 { //es中含本title数据,不再采集,更新list表数据状态
|
|
if Es.Count(EsIndex, EsType, esQuery) > 0 { //es中含本title数据,不再采集,更新list表数据状态
|
|
isEsRepeat = true
|
|
isEsRepeat = true
|
|
}
|
|
}
|
|
@@ -774,9 +774,9 @@ func (s *Spider) DownloadHighDetail() {
|
|
href := qu.ObjToString(tmp["href"])
|
|
href := qu.ObjToString(tmp["href"])
|
|
if isEsRepeat { //es数据title判重
|
|
if isEsRepeat { //es数据title判重
|
|
title := qu.ObjToString(tmp["title"])
|
|
title := qu.ObjToString(tmp["title"])
|
|
- eTime := fmt.Sprint(GetTime(0))
|
|
|
|
- sTime := fmt.Sprint(GetTime(-7))
|
|
|
|
- esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + sTime + `","lte": "` + eTime + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
|
|
|
|
|
|
+ eTime := time.Now().Unix()
|
|
|
|
+ sTime := eTime - int64(7*86400)
|
|
|
|
+ esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
|
|
count := Es.Count(EsIndex, EsType, esQuery)
|
|
count := Es.Count(EsIndex, EsType, esQuery)
|
|
if count > 0 { //es中含本title数据,不再采集,更新list表数据状态
|
|
if count > 0 { //es中含本title数据,不再采集,更新list表数据状态
|
|
set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true}} //已存在state置为1
|
|
set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true}} //已存在state置为1
|
|
@@ -910,9 +910,9 @@ func (s *Spider) DownloadListDetail() {
|
|
href := qu.ObjToString(tmp["href"])
|
|
href := qu.ObjToString(tmp["href"])
|
|
if isEsRepeat { //es数据title判重
|
|
if isEsRepeat { //es数据title判重
|
|
title := qu.ObjToString(tmp["title"])
|
|
title := qu.ObjToString(tmp["title"])
|
|
- eTime := fmt.Sprint(GetTime(0))
|
|
|
|
- sTime := fmt.Sprint(GetTime(-7))
|
|
|
|
- esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + sTime + `","lte": "` + eTime + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
|
|
|
|
|
|
+ eTime := time.Now().Unix()
|
|
|
|
+ sTime := eTime - int64(7*86400)
|
|
|
|
+ esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
|
|
if Es.Count(EsIndex, EsType, esQuery) > 0 { //es中含本title数据,不再采集,更新list表数据状态
|
|
if Es.Count(EsIndex, EsType, esQuery) > 0 { //es中含本title数据,不再采集,更新list表数据状态
|
|
set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true}} //已存在state置为1
|
|
set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true}} //已存在state置为1
|
|
Mgo.Update("spider_highlistdata", query, set, false, false)
|
|
Mgo.Update("spider_highlistdata", query, set, false, false)
|