Browse Source

新增基于页面渲染的下载方法

maxiaoshan 3 years ago
parent
commit
ef3ce1a7bf
2 changed files with 10 additions and 1 deletions
  1. 3 0
      src/config.json
  2. 7 1
      src/spider/script.go

+ 3 - 0
src/config.json

@@ -38,6 +38,9 @@
     "redishosts": [],
     "fileServer": "http://test.qmx.top:9333",
     "jsvmurl": "http://127.0.0.1:8080/jsvm",
+    "renderaddr": "http://8.131.72.226:8998/render.json",
+    "proxyaddr": "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch",
+    "proxyauthor": "Basic amlhbnl1MDAxOjEyM3F3ZSFB",
     "es": {
         "addr": "http://192.168.3.206:9800",
         "pool": 15,

+ 7 - 1
src/spider/script.go

@@ -848,7 +848,13 @@ func (s *Script) LoadScript(code, script_file string, newstate bool) string {
 		S.Push(lua.LString(fid))
 		return 5
 	}))
-
+	//渲染页面下载
+	s.L.SetGlobal("downloadByRender", s.L.NewFunction(func(S *lua.LState) int {
+		href := S.ToString(-1)
+		contentHtml := util.DownloadByRender(href)
+		S.Push(lua.LString(contentHtml))
+		return 1
+	}))
 	return ""
 }
 func dealHref(pageListUrl, href string) string {