Browse Source

中国招标投标公共服务平台python服务撤销

mxs 1 năm trước cách đây
mục cha
commit
566787102a
3 tập tin đã thay đổi với 20 bổ sung13 xóa
  1. 0 1
      src/config.json
  2. 18 10
      src/spider/script.go
  3. 2 2
      src/spider/spider.go

+ 0 - 1
src/config.json

@@ -53,7 +53,6 @@
     "renderaddr": "http://splash.spdata.jianyu360.com/render.json",
     "proxyaddr": "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch",
     "proxyauthor": "Basic amlhbnl1MDAxOjEyM3F3ZSFB",
-    "pwaddr": "http://172.17.145.179:8997/web/intercept/request?url=%s",
     "es": {
         "addr": "http://192.168.3.206:9800",
         "pool": 15,

+ 18 - 10
src/spider/script.go

@@ -608,6 +608,14 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		util.TimeSleepFunc(time.Second*2, TimeSleepChan)
 		return 0
 	}))
+	s.L.SetGlobal("runSleep", s.L.NewFunction(func(S *lua.LState) int {
+		t := S.ToInt(-1)
+		if t <= 0 {
+			t = 1
+		}
+		time.Sleep(time.Duration(t) * time.Second)
+		return 0
+	}))
 	//编码解码
 	s.L.SetGlobal("transCode", s.L.NewFunction(func(S *lua.LState) int {
 		codeType := strings.ToLower(S.ToString(-2))
@@ -1234,16 +1242,16 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		return 1
 	}))
 	//针对中国招标投标公共服务平台三级页瑞数加密下载方法
-	s.L.SetGlobal("downloadByDataIntercept", s.L.NewFunction(func(S *lua.LState) int {
-		url := S.ToString(-4)
-		url_regex := S.ToString(-3)
-		timeout := S.ToInt(-2)
-		proxy := S.ToBool(-1)
-		headers := util.DownloadByDataIntercept(url, url_regex, timeout, proxy)
-		table := util.MapToLuaTable(S, headers)
-		S.Push(table)
-		return 1
-	}))
+	//s.L.SetGlobal("downloadByDataIntercept", s.L.NewFunction(func(S *lua.LState) int {
+	//	url := S.ToString(-4)
+	//	url_regex := S.ToString(-3)
+	//	timeout := S.ToInt(-2)
+	//	proxy := S.ToBool(-1)
+	//	headers := util.DownloadByDataIntercept(url, url_regex, timeout, proxy)
+	//	table := util.MapToLuaTable(S, headers)
+	//	S.Push(table)
+	//	return 1
+	//}))
 	return ""
 }
 func dealHref(pageListUrl, href string) string {

+ 2 - 2
src/spider/spider.go

@@ -611,7 +611,7 @@ func (s *Spider) DownListPageItemBack() (errs interface{}) {
 	return errs
 }
 
-// 并发下载列表
+// DownListPageItemByThreads 并发下载列表(一次下载listthreadsnum页)
 func (s *Spider) DownListPageItemByThreads() (errs interface{}) {
 	defer qu.Catch()
 	start, max := s.GetIntVar("spiderStartPage"), s.GetIntVar("spiderMaxPage") //起始页、最大页
@@ -1341,7 +1341,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 	//根据发布时间进行数据判重校验
 	tmphref := qu.ObjToString(data["href"])
 	publishtime := qu.Int64All(data["l_np_publishtime"])
-	//7410节点(变链接节点)或者一年前数据进行全量bloomredis href判重
+	//7410节点(变链接节点)或者一年前数据进行全量bloomredis 三级页href判重
 	if util.Config.Uploadevent == 7410 || publishtime < time.Now().AddDate(-1, 0, 0).Unix() {
 		isExist, _ = util.ExistsBloomRedis("href", tmphref)
 		if isExist {