|
@@ -100,7 +100,8 @@ var (
|
|
RestrictAccessReg = regexp.MustCompile(`访问被拒绝`)
|
|
RestrictAccessReg = regexp.MustCompile(`访问被拒绝`)
|
|
AllThreadNum int64
|
|
AllThreadNum int64
|
|
ListAllThreadNum int64
|
|
ListAllThreadNum int64
|
|
- DelaySiteMap map[string]*DelaySite //延迟采集站点集合
|
|
|
|
|
|
+ DelaySiteMap map[string]*DelaySite //延迟采集站点集合
|
|
|
|
+ DelaySiteLock *sync.Mutex
|
|
UpdataHeartCache = make(chan []map[string]interface{}, 1000) //更新爬虫心跳信息
|
|
UpdataHeartCache = make(chan []map[string]interface{}, 1000) //更新爬虫心跳信息
|
|
SPH = make(chan bool, 5)
|
|
SPH = make(chan bool, 5)
|
|
|
|
|
|
@@ -1196,7 +1197,10 @@ func (s *Spider) HistoricalMendDownloadDetailItem(p interface{}) {
|
|
//2、非7000(历史节点)的历史补漏,采完列表直接采详情,采完爬虫下架(当前无此爬虫)
|
|
//2、非7000(历史节点)的历史补漏,采完列表直接采详情,采完爬虫下架(当前无此爬虫)
|
|
id := ""
|
|
id := ""
|
|
isEsRepeat := false
|
|
isEsRepeat := false
|
|
- if delaySite := DelaySiteMap[s.Name]; delaySite != nil && delaySite.Compete {
|
|
|
|
|
|
+ DelaySiteLock.Lock()
|
|
|
|
+ delaySite := DelaySiteMap[s.Name]
|
|
|
|
+ DelaySiteLock.Unlock()
|
|
|
|
+ if delaySite != nil && delaySite.Compete {
|
|
title := qu.ObjToString(paramdata["title"])
|
|
title := qu.ObjToString(paramdata["title"])
|
|
eTime := time.Now().Unix()
|
|
eTime := time.Now().Unix()
|
|
sTime := eTime - int64(7*86400)
|
|
sTime := eTime - int64(7*86400)
|
|
@@ -1316,7 +1320,10 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
|
|
UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail", false) //记录modal=0老模式采集三级页心跳
|
|
UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail", false) //记录modal=0老模式采集三级页心跳
|
|
}
|
|
}
|
|
isEsRepeat := false
|
|
isEsRepeat := false
|
|
- if delaySite := DelaySiteMap[s.Name]; delaySite != nil && delaySite.Compete {
|
|
|
|
|
|
+ DelaySiteLock.Lock()
|
|
|
|
+ delaySite := DelaySiteMap[s.Name]
|
|
|
|
+ DelaySiteLock.Unlock()
|
|
|
|
+ if delaySite != nil && delaySite.Compete {
|
|
title := qu.ObjToString(paramdata["title"])
|
|
title := qu.ObjToString(paramdata["title"])
|
|
eTime := time.Now().Unix()
|
|
eTime := time.Now().Unix()
|
|
sTime := eTime - int64(7*86400)
|
|
sTime := eTime - int64(7*86400)
|
|
@@ -1547,7 +1554,10 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
|
|
o := map[string]interface{}{"_id": -1}
|
|
o := map[string]interface{}{"_id": -1}
|
|
if !isHistory { //非历史数据下载,补充comeintime时间检索条件
|
|
if !isHistory { //非历史数据下载,补充comeintime时间检索条件
|
|
comeintimeQuery := map[string]interface{}{"$gte": GetTime(-util.Config.DayNum)} //采集一周内的数据,防止有数据一直采不下来,造成积累
|
|
comeintimeQuery := map[string]interface{}{"$gte": GetTime(-util.Config.DayNum)} //采集一周内的数据,防止有数据一直采不下来,造成积累
|
|
- if delaySite := DelaySiteMap[s.Name]; delaySite != nil {
|
|
|
|
|
|
+ DelaySiteLock.Lock()
|
|
|
|
+ delaySite := DelaySiteMap[s.Name]
|
|
|
|
+ DelaySiteLock.Unlock()
|
|
|
|
+ if delaySite != nil {
|
|
isEsRepeat = delaySite.Compete
|
|
isEsRepeat = delaySite.Compete
|
|
if delaySite.DelayTime <= util.Config.DayNum*24 { //判断该爬虫是否属于要延迟采集的站点,数据延迟delayDay小时采集(由于7410、7500、7700为顺序采集,无法延时)
|
|
if delaySite.DelayTime <= util.Config.DayNum*24 { //判断该爬虫是否属于要延迟采集的站点,数据延迟delayDay小时采集(由于7410、7500、7700为顺序采集,无法延时)
|
|
//comeintimeQuery["$lte"] = GetTime(-delayDay + 1)
|
|
//comeintimeQuery["$lte"] = GetTime(-delayDay + 1)
|