Kaynağa Gözat

下载信息补充infoformat字段

maxiaoshan 2 yıl önce
ebeveyn
işleme
d7d67babf6
2 değiştirilmiş dosya ile 17 ekleme ve 4 silme
  1. 11 1
      src/spider/handler.go
  2. 6 3
      src/spider/spider.go

+ 11 - 1
src/spider/handler.go

@@ -920,6 +920,8 @@ func CreateSpider(code, luafile string, newstate, thread bool) (*Spider, string)
 	spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
 	//新老爬虫
 	spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
+	//爬虫类型
+	spider.Infoformat = spider.GetIntVar("spiderInfoformat")
 	return spider, ""
 }
 
@@ -971,6 +973,8 @@ func UpdateSpider(spider *Spider, code, script string) {
 	spider.IsMustDownload = ts.GetBoolVar("spiderIsMustDownload")
 	//新老爬虫
 	spider.IsCompete = ts.GetBoolVar("spiderIsCompete")
+	//爬虫类型
+	spider.Infoformat = spider.GetIntVar("spiderInfoformat")
 }
 
 //排队模式生成爬虫
@@ -1031,6 +1035,8 @@ func NewSpider_New(code, luafile string, newstate bool) (*Spider, string) {
 	spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
 	//新老爬虫
 	spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
+	//爬虫类型
+	spider.Infoformat = spider.GetIntVar("spiderInfoformat")
 	return spider, ""
 }
 
@@ -1093,6 +1099,8 @@ func NewSpider(code, luafile string) (*Spider, string) {
 	spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
 	//新老爬虫
 	spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
+	//爬虫类型
+	spider.Infoformat = spider.GetIntVar("spiderInfoformat")
 	return spider, ""
 }
 
@@ -1147,6 +1155,8 @@ func NewSpiderForThread(code, luafile string) (*Spider, string) {
 	spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
 	//新老爬虫
 	spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
+	//爬虫类型
+	spider.Infoformat = spider.GetIntVar("spiderInfoformat")
 	return spider, ""
 }
 
@@ -1264,7 +1274,7 @@ func GetScriptByTmp(luaconfig map[string]interface{}) string {
 		} else { //专家模式
 			script_content = luaconfig["str_content"].(string)
 		}
-		script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"], luaconfig["spidercompete"])
+		script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"], luaconfig["spidercompete"], luaconfig["infoformat"])
 		script += ` 
 			` + script_time + `
 			` + script_list + `

+ 6 - 3
src/spider/spider.go

@@ -80,6 +80,7 @@ type Spider struct {
 	IsHistoricalMend bool //是否是历史补漏爬虫
 	IsMustDownload   bool //是否强制下载
 	IsCompete        bool //区分新老爬虫
+	Infoformat       int  //区分爬虫类型 1:招标;2:拟建/审批;3:产权
 	IsMainThread     bool //是否为主线程(多线程采集时区分是否为主线程)
 }
 
@@ -658,7 +659,8 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 	//atomic.AddInt32(&s.TodayDowncount, 1)
 	//atomic.AddInt32(&s.TotalDowncount, 1)
 	data["spidercode"] = s.Code
-	data["iscompete"] = s.IsCompete //2021-11-01以后新增的爬虫不在展示原文链接(保存服务判断)
+	data["iscompete"] = s.IsCompete   //2021-11-01以后新增的爬虫不在展示原文链接(保存服务判断)
+	data["infoformat"] = s.Infoformat //爬虫类型
 	Store(s.StoreMode, s.StoreToMsgEvent, s.Collection, s.CoverAttr, data, true)
 }
 
@@ -995,7 +997,6 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 					}
 					delete(data, "exit")
 					delete(data, "checkpublishtime")
-					data["comeintime"] = time.Now().Unix()
 					//计数
 					//tmpsp1, b := Allspiders.Load(s.Code)
 					//if b {
@@ -1006,9 +1007,11 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 					//		atomic.AddInt32(&sp1.TotalDowncount, 1)
 					//	}
 					//}
+					data["comeintime"] = time.Now().Unix()
 					data["spidercode"] = s.Code
 					data["dataging"] = 0
-					data["iscompete"] = s.IsCompete //2021-11-01以后新增的爬虫不在展示原文链接(保存服务判断)
+					data["iscompete"] = s.IsCompete   //2021-11-01以后新增的爬虫不在展示原文链接(保存服务判断)
+					data["infoformat"] = s.Infoformat //爬虫类型
 					Store(s.StoreMode, s.StoreToMsgEvent, s.Collection, s.CoverAttr, data, true)
 					set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "updatetime": time.Now().Unix()}} //下载成功state置为1
 					update = append(update, query)