Prechádzať zdrojové kódy

chromedp测试方法

maxiaoshan 2 rokov pred
rodič
commit
329de18c78
3 zmenil súbory, kde vykonal 73 pridanie a 49 odobranie
  1. 68 48
      src/spider/script.go
  2. 1 1
      src/taskManager/taskManager.go
  3. 4 0
      src/util/util.go

+ 68 - 48
src/spider/script.go

@@ -25,7 +25,7 @@ import (
 	"path"
 	qu "qfw/util"
 	"regexp"
-	util "spiderutil"
+	sp "spiderutil"
 	"strconv"
 	"strings"
 	"time"
@@ -84,7 +84,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		if charset == "" {
 			charset = s.Encoding
 		}
-		ret := Download(downloadnode, s.Downloader, url, "get", util.GetTable(head), charset, false, ishttps, "", s.Timeout)
+		ret := Download(downloadnode, s.Downloader, url, "get", sp.GetTable(head), charset, false, ishttps, "", s.Timeout)
 		S.Push(lua.LString(ret))
 		s.Test_luareqcount++
 		return 1
@@ -92,7 +92,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 	s.L.SetGlobal("findContentText", s.L.NewFunction(func(S *lua.LState) int {
 		gpath := S.ToString(-2)
 		content := S.ToString(-1)
-		ret := util.FindContentText(gpath, content)
+		ret := sp.FindContentText(gpath, content)
 		S.Push(ret)
 		return 1
 	}))
@@ -115,14 +115,14 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		var headers = map[string]interface{}{}
 		if param == nil {
 			ptext := map[string]interface{}{"text": S.ToString(-3)}
-			ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, ptext, util.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
+			ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, ptext, sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
 		} else {
-			ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
+			ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
 		}
 		S.Push(lua.LString(ret))
 		scookie, _ := json.Marshal(retcookie)
 		S.Push(lua.LString(scookie))
-		hTable := util.MapToLuaTable(S, headers)
+		hTable := sp.MapToLuaTable(S, headers)
 		S.Push(hTable)
 		s.Test_luareqcount++
 		return 3
@@ -145,14 +145,14 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		var headers = map[string]interface{}{}
 		if param == nil {
 			ptext := map[string]interface{}{"text": S.ToString(-3)}
-			ret, retcookie, headers = DownloadAdvNew(downloadnode, s.Downloader, url, method, ptext, util.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
+			ret, retcookie, headers = DownloadAdvNew(downloadnode, s.Downloader, url, method, ptext, sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
 		} else {
-			ret, retcookie, headers = DownloadAdvNew(downloadnode, s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
+			ret, retcookie, headers = DownloadAdvNew(downloadnode, s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
 		}
 		S.Push(lua.LString(ret))
 		scookie, _ := json.Marshal(retcookie)
 		S.Push(lua.LString(scookie))
-		hTable := util.MapToLuaTable(S, headers)
+		hTable := sp.MapToLuaTable(S, headers)
 		S.Push(hTable)
 		s.Test_luareqcount++
 		return 3
@@ -161,7 +161,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		nodetype := S.ToString(-3)
 		gpath := S.ToString(-2)
 		content := S.ToString(-1)
-		ret := util.FindOneText(gpath, content, nodetype)
+		ret := sp.FindOneText(gpath, content, nodetype)
 		S.Push(ret)
 		return 1
 	}))
@@ -169,7 +169,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		nodetype := S.ToString(-3)
 		gpath := S.ToString(-2)
 		content := S.ToString(-1)
-		ret := util.FindOneHtml(gpath, content, nodetype)
+		ret := sp.FindOneHtml(gpath, content, nodetype)
 		S.Push(ret)
 		return 1
 	}))
@@ -177,7 +177,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		gpath := S.ToString(-2)
 		content := S.ToString(-1)
 		ret := s.L.NewTable()
-		util.FindListText(gpath, content, ret)
+		sp.FindListText(gpath, content, ret)
 		S.Push(ret)
 		return 1
 	}))
@@ -185,7 +185,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		gpath := S.ToString(-2)
 		content := S.ToString(-1)
 		ret := s.L.NewTable()
-		util.FindListHtml(gpath, content, ret)
+		sp.FindListHtml(gpath, content, ret)
 		S.Push(ret)
 		return 1
 	}))
@@ -197,14 +197,14 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		qu.Debug(s.SCode, index, table.Len())
 		if index == 1 {
 			if pageno == 1 { //第一页数据
-				oneMap := util.TableToMap(table)
+				oneMap := sp.TableToMap(table)
 				text, _ := json.Marshal(oneMap)
-				hashText := util.HexTextByte(text)
+				hashText := sp.HexTextByte(text)
 				qu.Debug("第一页:", hashText)
 			} else if pageno == 2 { //第一页数据
-				twoMap := util.TableToMap(table)
+				twoMap := sp.TableToMap(table)
 				text, _ := json.Marshal(twoMap)
-				hashText := util.HexTextByte(text)
+				hashText := sp.HexTextByte(text)
 				qu.Debug("第二页:", hashText)
 			}
 		}
@@ -214,7 +214,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		qmap := S.ToTable(-2)
 		content := S.ToString(-1)
 		ret := s.L.NewTable()
-		util.FindMap(qmap, content, ret)
+		sp.FindMap(qmap, content, ret)
 		S.Push(ret)
 		return 1
 	}))
@@ -226,7 +226,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 			ret.RawSet(lua.LString("val"), lua.LString(""))
 			ret.RawSet(lua.LString("err"), lua.LString("js is null"))
 		} else {
-			rep := util.JsVmPost(util.Config.JsVmUrl, js)
+			rep := sp.JsVmPost(sp.Config.JsVmUrl, js)
 			ret.RawSet(lua.LString("val"), lua.LString(qu.ObjToString(rep["val"])))
 			ret.RawSet(lua.LString("err"), lua.LString(qu.ObjToString(rep["err"])))
 		}
@@ -272,7 +272,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		case "urldecode_utf8":
 			str, _ = url.QueryUnescape(str)
 		case "decode64":
-			str = util.DecodeB64(str)
+			str = sp.DecodeB64(str)
 		case "encodemd5":
 			str = qu.GetMd5String(str)
 		case "htmldecode": //html实体码
@@ -349,7 +349,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 			dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(url[index+1:]))
 			ret, err := io.ReadAll(dec)
 			if err == nil && len(ret) > 0 {
-				url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, "", ret)
+				url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, "", ret)
 			}
 		} else { //正常url
 			var mycookie []*http.Cookie
@@ -360,13 +360,13 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 			}
 			fileName = strings.TrimSpace(fileName)
 			url = strings.TrimSpace(url)
-			ret := DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout)
+			ret := DownloadFile(s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout)
 			qu.Debug(GarbledCodeReg.FindAllString(string(ret), -1), len(ret))
 			if ret == nil || len(ret) < 1024*3 {
 				qu.Debug("下载文件出错!")
 			} else {
 				ftype = qu.GetFileType(ret)
-				url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
+				url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, url, ret)
 				if (ftype == "docx" || ftype == "doc") && len(GarbledCodeReg.FindAllString(string(ret), -1)) > 10 {
 					name = "附件中含有乱码"
 				}
@@ -397,7 +397,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		ret, err := io.ReadAll(dec)
 		name, size, ftype, fid := "", "", "", ""
 		if err == nil && len(ret) > 0 {
-			url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
+			url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, url, ret)
 		}
 		S.Push(lua.LString(url))
 		S.Push(lua.LString(name))
@@ -433,10 +433,10 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 	}))
 	//标题的关键词、排除词过滤
 	s.L.SetGlobal("pagefilterword", s.L.NewFunction(func(S *lua.LState) int {
-		keyWordReg := regexp.MustCompile(util.Config.Word["keyword"])
-		notKeyWordReg := regexp.MustCompile(util.Config.Word["notkeyword"])
+		keyWordReg := regexp.MustCompile(sp.Config.Word["keyword"])
+		notKeyWordReg := regexp.MustCompile(sp.Config.Word["notkeyword"])
 		data := S.ToTable(-1)
-		dataMap := util.TableToMap(data)
+		dataMap := sp.TableToMap(data)
 		ret := s.L.NewTable()
 		num := 1
 		for _, v := range dataMap {
@@ -448,7 +448,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 				}
 			}
 			if isOk {
-				ret.Insert(num, util.MapToLuaTable(S, tmp))
+				ret.Insert(num, sp.MapToLuaTable(S, tmp))
 				num++
 			}
 		}
@@ -457,10 +457,10 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 	}))
 	//标题的关键词、排除词过滤
 	s.L.SetGlobal("detailfilterword", s.L.NewFunction(func(S *lua.LState) int {
-		keyWordReg := regexp.MustCompile(util.Config.Word["keyword"])
-		notKeyWordReg := regexp.MustCompile(util.Config.Word["notkeyword"])
+		keyWordReg := regexp.MustCompile(sp.Config.Word["keyword"])
+		notKeyWordReg := regexp.MustCompile(sp.Config.Word["notkeyword"])
 		data := S.ToTable(-1)
-		dataMap := util.TableToMap(data)
+		dataMap := sp.TableToMap(data)
 		if title := qu.ObjToString(dataMap["title"]); title != "" {
 			if keyWordReg.MatchString(title) && !notKeyWordReg.MatchString(title) {
 				S.Push(lua.LBool(true))
@@ -528,7 +528,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		key := S.ToString(-1)
 		bytekey := []byte(key)
 		byteorigData := []byte(origData)
-		encrypted := util.AesECBEncrypt(byteorigData, bytekey)
+		encrypted := sp.AesECBEncrypt(byteorigData, bytekey)
 		result := base64.StdEncoding.EncodeToString(encrypted)
 		S.Push(lua.LString(result))
 		return 1
@@ -538,7 +538,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		origData := S.ToString(-2)
 		key := S.ToString(-1)
 		data, _ := base64.StdEncoding.DecodeString(origData)
-		result := util.AesECBDecrypter(data, []byte(key))
+		result := sp.AesECBDecrypter(data, []byte(key))
 		S.Push(lua.LString(result))
 		return 1
 	}))
@@ -546,7 +546,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 	s.L.SetGlobal("desEncryptECB", s.L.NewFunction(func(S *lua.LState) int {
 		origData := S.ToString(-2)
 		key := S.ToString(-1)
-		encrypted := util.DesECBEncrypt([]byte(origData), []byte(key))
+		encrypted := sp.DesECBEncrypt([]byte(origData), []byte(key))
 		result := base64.StdEncoding.EncodeToString(encrypted)
 		S.Push(lua.LString(result))
 		return 1
@@ -556,7 +556,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		origData := S.ToString(-2)
 		key := S.ToString(-1)
 		data, _ := base64.StdEncoding.DecodeString(origData)
-		result := util.DesECBDecrypter(data, []byte(key))
+		result := sp.DesECBDecrypter(data, []byte(key))
 		S.Push(lua.LString(result))
 		return 1
 	}))
@@ -564,7 +564,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 	s.L.SetGlobal("rsaEncrypt", s.L.NewFunction(func(S *lua.LState) int {
 		origData := S.ToString(-2)
 		key := S.ToString(-1)
-		encrypted := util.EncryptWithPublicKey([]byte(origData), []byte(key))
+		encrypted := sp.EncryptWithPublicKey([]byte(origData), []byte(key))
 		result := base64.StdEncoding.EncodeToString(encrypted)
 		S.Push(lua.LString(result))
 		return 1
@@ -574,7 +574,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		origData := S.ToString(-2)
 		key := S.ToString(-1)
 		data, _ := base64.StdEncoding.DecodeString(origData)
-		result := util.DecryptWithPrivateKey(data, []byte(key))
+		result := sp.DecryptWithPrivateKey(data, []byte(key))
 		S.Push(lua.LString(result))
 		return 1
 	}))
@@ -582,7 +582,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 	s.L.SetGlobal("getPublishtime", s.L.NewFunction(func(S *lua.LState) int {
 		detail := S.ToString(-2)
 		contenthtml := S.ToString(-1)
-		publishtime := util.GetPublishtime([]string{contenthtml, detail})
+		publishtime := sp.GetPublishtime([]string{contenthtml, detail})
 		S.Push(lua.LString(publishtime))
 		return 1
 	}))
@@ -667,7 +667,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		head := S.ToTable(-2)
 		stype := S.ToString(-3)
 		path := S.ToString(-4)
-		headMap := util.GetTable(head)
+		headMap := sp.GetTable(head)
 		//qu.Debug("cookie----------", cookie)
 		//qu.Debug("headMap----------", headMap)
 		headJsonStr := ""
@@ -682,7 +682,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		S.Push(lua.LString(code))
 		respHeadMap := map[string]interface{}{}
 		json.Unmarshal([]byte(respHead), &respHeadMap)
-		hTable := util.MapToLuaTable(S, respHeadMap)
+		hTable := sp.MapToLuaTable(S, respHeadMap)
 		S.Push(hTable)
 		S.Push(lua.LString(respCookie))
 		return 3
@@ -711,7 +711,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		}
 		fileName = strings.TrimSpace(fileName)
 		url = strings.TrimSpace(url)
-		ret := NewDownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout, false)
+		ret := NewDownloadFile(s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout, false)
 
 		name, size, ftype, fid := "", "", "", ""
 		qu.Debug(GarbledCodeReg.FindAllString(string(ret), -1), len(ret))
@@ -722,7 +722,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 			if (ftype == "docx" || ftype == "doc") && len(GarbledCodeReg.FindAllString(string(ret), -1)) > 10 {
 				url, name, size, ftype, fid = "附件中含有乱码", "附件中含有乱码", "", "", ""
 			} else {
-				url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
+				url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, url, ret)
 			}
 		}
 		if strings.TrimSpace(ftype) == "" {
@@ -740,7 +740,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 	//渲染页面下载
 	s.L.SetGlobal("downloadByRender", s.L.NewFunction(func(S *lua.LState) int {
 		href := S.ToString(-1)
-		contentHtml := util.DownloadByRender(href)
+		contentHtml := sp.DownloadByRender(href)
 		S.Push(lua.LString(contentHtml))
 		return 1
 	}))
@@ -748,8 +748,8 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 	s.L.SetGlobal("getSimulateLoginInfo", s.L.NewFunction(func(S *lua.LState) int {
 		param := S.ToTable(-1)
 		header := S.ToTable(-2)
-		byteParam, _ := json.Marshal(util.TableToMap(param))
-		headerParam, _ := json.Marshal(util.TableToMap(header))
+		byteParam, _ := json.Marshal(sp.TableToMap(param))
+		headerParam, _ := json.Marshal(sp.TableToMap(header))
 		stype := S.ToString(-3) //login:登陆,get:获取cookie
 		qu.Debug(string(headerParam))
 		qu.Debug(string(byteParam))
@@ -764,8 +764,8 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		id := S.ToString(-3)
 		param := S.ToTable(-2)
 		head := S.ToTable(-1)
-		paramMap := util.GetTable(param)
-		headMap := util.GetTable(head)
+		paramMap := sp.GetTable(param)
+		headMap := sp.GetTable(head)
 		qu.Debug(paramMap)
 		qu.Debug(headMap)
 		paramMap = map[string]interface{}{
@@ -829,9 +829,29 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		S.Push(lua.LBool(result))
 		return 1
 	}))
+	//chromedp下载
+	s.L.SetGlobal("downloadByChrome", s.L.NewFunction(func(S *lua.LState) int {
+		//blink := S.ToString(-4)
+		timeout := S.ToInt64(-3)
+		isProxy := S.ToBool(-2)
+		taskStr := S.ToString(-1)
+		task := []map[string]interface{}{}
+		tabResult := S.NewTable()
+		if json.Unmarshal([]byte(taskStr), &task) == nil {
+			chromeCase := sp.GetChromedpCase(timeout, isProxy, task)
+			qu.Debug(timeout, isProxy, chromeCase)
+			result := sp.DownloadByChromedp(chromeCase)
+			for i, v := range result {
+				tabResult.Insert(i+1, lua.LString(v))
+			}
+			S.Push(tabResult)
+		} else {
+			S.Push(tabResult)
+		}
+		return 1
+	}))
 }
 
-//
 func (s *Script) Reload() {
 	s.L.Close()
 	site := ""

+ 1 - 1
src/taskManager/taskManager.go

@@ -322,7 +322,7 @@ func (t *TaskM) Mytask() {
 				//查询关联爬虫
 				lua, _ := u.MgoEB.FindOneByField("luaconfig", map[string]interface{}{"code": code + u.Bu, "state": 5}, map[string]interface{}{"relatecode": 1})
 				if len(*lua) > 0 {
-					relatecode = qu.ObjToString((*lua)["relatecode"])
+					relatecode = code + u.Bu
 				}
 				v["relatecode"] = relatecode
 				v["encode"] = util.Se.Encode2Hex(code)

+ 4 - 0
src/util/util.go

@@ -223,9 +223,13 @@ func SpiderPassCheckListAndDetail(result map[int64][]map[string]interface{}, dat
 			msgMap["三级页publishtime值类型异常"] = true
 		}
 		contenthtml := qu.ObjToString(data["contenthtml"])
+		contenthtml = "<br> <br> 首页 > 交易信息 > 工程建设 > 核准信息公告 <br> <br> 乾潭镇五金工业园区置信小微园边坡治理工程 <br> 2023-06-20 <br> <iframe frameborder=\\\"0\\\" src=\\\"https://ggzy.hzctc.hangzhou.gov.cn:20001/uservice/downloadandshow.aspx?dirtype=3&filepath=20230606044619e5805b.pdf\\\" ></iframe> <br> <br> <br> 相关附件: <ul > <li >【招标申请备案登记表】</li> </ul> <br> <br> 相关公告: <ul > <li >【招标公告】 乾潭镇五金工业园区置信小微园边坡治理工程【2023-06-20】 </li> </ul> "
 		if strings.Contains(contenthtml, "img") {
 			msgMap["contenthtml中含有img是否下载"] = true
 		}
+		if strings.Contains(contenthtml, "iframe") {
+			msgMap["contenthtml中含有iframe是否下载"] = true
+		}
 		detail := qu.ObjToString(data["detail"])
 		if TitleFilterReg2.MatchString(detail) {
 			msgMap["三级页正文提取异常"] = true