소스 검색

新增spidercompete是否是新爬虫判断

maxiaoshan 3 년 전
부모
커밋
529fe9baf2
3개의 변경된 파일7개의 추가작업 그리고 4개의 파일을 삭제
  1. 3 3
      src/config.json
  2. 2 1
      src/spider/handler.go
  3. 2 0
      src/spider/spider.go

+ 3 - 3
src/config.json

@@ -11,13 +11,13 @@
     "tmpmgodb": "spider",
     "tmpmgocoll": "regatherdata",
     "spiderchan": 5,
-    "msgserveraddr": "127.0.0.1:801",
-    "msgserveraddrfile": "127.0.0.1:802",
+    "msgserveraddr": "spdata.jianyu360.com:801",
+    "msgserveraddrfile": "spdata.jianyu360.com:802",
     "fileServer": "http://test.qmx.top:9333",
     "jsvmurl": "http://127.0.0.1:8080/jsvm",
     "logLevel": 1,
     "testdir": "res/test/spider_test.lua",
-    "redisservers": "title_repeat_judgement=192.168.3.207:1479,title_repeat_fulljudgement=192.168.3.207:1579",
+    "redisservers": "title_repeat_judgement=192.168.3.207:1679,title_repeat_fulljudgement=192.168.3.207:1679",
     "word":{
     	"keyword":"(抽签|中标|招标|成交|合同|中标候选人|资格预审|拟建|邀请|询价|比选|议价|竞价|磋商|采购|招投标|答疑|变更公告|更正公告|竞争性谈判|竞谈|意见征询|澄清|单一来源|流标|废标|验收公告|中止|终止|违规|处罚|征集公告|开标结果|评审结果|监理|招租|租赁|评判结果|项目|遴选|补遗|竞标|征求意见|标段|定点结果|项目评审公示|采购项目违规|采购活动中违规|项目行政处罚|采购行政处罚|项目审批公示)",
     	"notkeyword":"(招聘|拍卖|出租|出让|使用权|资产)"

+ 2 - 1
src/spider/handler.go

@@ -61,7 +61,7 @@ func NewSpider(code, script string) *Spider {
 		spider.Timeout = int64(spiderTimeout)
 	}
 	spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
-
+	spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
 	//date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
 	// tmp := GetDownloadLast(spider.Code, date) //
 	// if len(tmp) > 0 {
@@ -145,6 +145,7 @@ func GetScriptByTmp(luaconfig map[string]interface{}) string {
 		} else {
 			script_content = luaconfig["str_content"].(string)
 		}
+		script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"], luaconfig["spidercompete"])
 		script += ` 
 			` + script_time + `
 			` + script_list + `

+ 2 - 0
src/spider/spider.go

@@ -59,6 +59,7 @@ type Spider struct {
 	// //历史补漏
 	// IsHistoricalMend bool //是否是历史补漏爬虫
 	// IsMustDownload   bool //是否强制下载
+	IsCompete bool //区分新老爬虫
 }
 
 var TimeChan = make(chan bool, 1)
@@ -182,6 +183,7 @@ func (s *Spider) DownloadDetailItem(paramdata, tmp map[string]interface{}) {
 			}
 			tmp["comeintime"] = time.Now().Unix()
 			delete(tmp, "state")
+			tmp["iscompete"] = s.IsCompete
 			saveFlag := Store(s.StoreMode, s.StoreToMsgEvent, s.Collection, s.CoverAttr, tmp, true)
 			if saveFlag { //修改state状态
 				data["state"] = 3