@@ -1,7 +1,7 @@
package main
import (
- codegrpc "analysiscode"
+ codegrpc "analysiscode/client"
"fmt"
"io/ioutil"
mgo "mongodb"
@@ -287,9 +287,9 @@ func SaveObj(event int, checkAtrr string, data map[string]interface{}, saveredis
data["biddingcoll"] = arr[1]
}
MgoS.Save("data_bak", data)
- if !isExists {
- util.RedisClusterSet(hashHref, "", -1)
- }
+ //if !isExists {
+ // util.RedisClusterSet(hashHref, "", -1)//保存服务过滤掉的异常数据href不再存全量redis,会造成后期无法重采数据
+ //}
//id := MgoS.Save("data_bak", data)
////保存服务未接收成功的数据会存入data_bak中,确保数据不丢失依赖补发程序
//if id != "" && !flag { //保存服务发送成功
@@ -7,7 +7,7 @@ LUA中公共的方法需要抽出来,主脚本文件加载LUA公共文件
package spider
"bytes"
"compress/gzip"
"crypto/aes"
@@ -68,7 +68,11 @@ var UpdataMgoCache = make(chan []map[string]interface{}, 1000) //更新要重下
var SP = make(chan bool, 5)
var TimeChan = make(chan bool, 1)
var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
-var DelaySites map[string]int //延迟采集站点集合
+var DelaySiteMap map[string]*DelaySite //延迟采集站点集合
+type DelaySite struct {
+ DelayTime int
+ Compete bool
+}
//高性能模式定时采集三级页信息
func DetailData() {
@@ -9,11 +9,15 @@ var ErrFid = "a6879f0a8570256aa21fb978e6dabb50429a30dfacff697cf0b898abbc5c262e"
//初始化延迟采集站点集合
func InitOther() {
defer qu.Catch()
- DelaySites = map[string]int{}
+ DelaySiteMap = map[string]*DelaySite{}
list, _ := MgoS.Find("spider_compete", nil, nil, nil, false, -1, -1)
for _, l := range *list {
site := qu.ObjToString(l["site"])
delayTime := qu.IntAll(l["delaytime"])
- DelaySites[site] = delayTime
+ compete, _ := l["compete"].(bool)
+ DelaySiteMap[site] = &DelaySite{
+ DelayTime: delayTime,
+ Compete: compete,
+ }