Răsfoiți Sursa

公共方法新增

maxiaoshan 2 ani în urmă
părinte
comite
e488ab548c
2 a modificat fișierele cu 184 adăugiri și 136 ștergeri
  1. 2 0
      src/spider/msgservice.go
  2. 182 136
      src/spider/script.go

+ 2 - 0
src/spider/msgservice.go

@@ -253,6 +253,8 @@ func SaveObj(event int, checkAtrr string, data map[string]interface{}, saveredis
 		href := fmt.Sprint(data["href"])
 		hashHref := util.HexText(href)
 		util.RedisClusterSet(hashHref, "", -1)
+		data["detail"] = ""      //字段太大
+		data["contenthtml"] = "" //字段太大
 		MgoS.Save("spider_filterdata", data)
 		//log.Println(event, checkAtrr, data["href"], data["title"], len(bs))
 		return

+ 182 - 136
src/spider/script.go

@@ -10,7 +10,6 @@ import (
 	codegrpc "analysiscode/client"
 	"bytes"
 	"compress/gzip"
-	"crypto/aes"
 	"encoding/base64"
 	"encoding/json"
 	gojs "gorunjs/client"
@@ -176,6 +175,130 @@ func (s *Script) LoadScript(site *string, code, script_file string, newstate boo
 		s.LastThreeTimes = append(s.LastThreeTimes, end)
 		return 3
 	}))
+	//下载附件download(url,method,param,head,cookie,fileName)
+	s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
+		if s.FileLastThreeTimes == nil {
+			s.FileLastThreeTimes = make([]time.Duration, 4)
+		}
+		if util.Config.IsDelay {
+			SleepTime(3, s.FileLastThreeTimes) //睡眠时间
+		}
+		start := time.Now() //起始时间
+		cookie := S.ToString(-1)
+		head := S.ToTable(-2)
+		param := S.ToTable(-3)
+		method := S.ToString(-4)
+		url := S.ToString(-5)
+		fileName := S.ToString(-6)
+		ishttps := strings.Contains(url, "https")
+		//base64匹配
+		base64UrlReg := regexp.MustCompile("data:image")
+		indexArr := base64UrlReg.FindStringIndex(url)
+		name, size, ftype, fid := "", "", "", ""
+		tmpUrl := ""
+		var ret []byte
+		var err error
+		var mycookie []*http.Cookie
+		if cookie != "{}" {
+			json.Unmarshal([]byte(cookie), &mycookie)
+		} else {
+			mycookie = make([]*http.Cookie, 0)
+		}
+
+		//base64 url
+		if len(indexArr) == 2 { //base64 http://www.mmjyjt.com/data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqAAAAOwCAYAAAD
+			//截取base64
+			start := indexArr[0]
+			url = url[start:]
+			fileName = "文件下载.jpg"
+			index := strings.Index(url, ",")
+			dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(url[index+1:]))
+			ret, err = io.ReadAll(dec)
+			if err == nil && len(ret) > 0 {
+				url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, "", ret)
+			}
+		} else {
+			fileName = strings.TrimSpace(fileName)
+			url = strings.TrimSpace(url)
+			tmpUrl = url
+			ret = DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
+			url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
+			if strings.TrimSpace(ftype) == "" {
+				if len(path.Ext(name)) > 0 {
+					ftype = path.Ext(name)[1:]
+				}
+			}
+		}
+		//特殊处理中国招标投标公共服务平台异常附件过滤
+		if *site == "中国招标投标公共服务平台" {
+			if fid != "" && strings.Contains(fid, ErrFid) { //限制访问的附件
+				size, ftype, fid = "", "", "" //信息置空,AnalysisProjectInfo方法将判断数据下载失败重新下载
+			} else if bttype := qu.GetFileType(ret); bttype != "pdf" { //由字节流解析的附件类型不是pdf
+				logger.Info("Error File Type:", bttype, url)
+				size, ftype, fid = "", "", ""
+			}
+		} else if *site == "中国政府采购网" && tmpUrl != "" { //中国政府采购网附件大小异常,限制IP所致
+			if size == "4.1 KB" || size == "4.2 KB" {
+				times := 1
+				for { //重试三次
+					if times > 3 {
+						break
+					}
+					//http://www.ccgp.gov.cn/cggg/dfgg/jzxcs/202302/t20230210_19437644.htm
+					ret = DownloadFile(s.Downloader, tmpUrl, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
+					bs := bytes.NewReader(ret)
+					bsLen := qu.ConvertFileSize(bs.Len())
+					if bsLen != "4.1 KB" && bsLen != "4.2 KB" && bsLen != "0 B" {
+						url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, tmpUrl, ret)
+						break
+					}
+					times++
+				}
+				if size == "4.1 KB" || size == "4.2 KB" { //重试后异常
+					fid = ""
+					ftype = ""
+					name = ""
+				}
+			}
+		}
+		S.Push(lua.LString(url))
+		S.Push(lua.LString(name))
+		S.Push(lua.LString(size))
+		S.Push(lua.LString(ftype))
+		S.Push(lua.LString(fid))
+		atomic.AddInt32(&s.ToDayRequestNum, 1)
+		atomic.AddInt32(&s.TotalRequestNum, 1)
+
+		end := time.Since(start)
+		if len(s.FileLastThreeTimes) >= 4 {
+			s.FileLastThreeTimes = s.FileLastThreeTimes[1:]
+		}
+		s.FileLastThreeTimes = append(s.FileLastThreeTimes, end)
+		return 5
+	}))
+	//下载、上传base64图片
+	s.L.SetGlobal("downloadBase64File", s.L.NewFunction(func(S *lua.LState) int {
+		url := S.ToString(-3)
+		fileName := S.ToString(-2)
+		base64Img := S.ToString(-1)
+		if fileName == "" {
+			fileName = "文件下载"
+		}
+		fileName = fileName + ".jpg"
+		i := strings.Index(base64Img, ",")
+		dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(base64Img[i+1:]))
+		ret, err := io.ReadAll(dec)
+		name, size, ftype, fid := "", "", "", ""
+		if err == nil && len(ret) > 0 {
+			url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
+		}
+		S.Push(lua.LString(url))
+		S.Push(lua.LString(name))
+		S.Push(lua.LString(size))
+		S.Push(lua.LString(ftype))
+		S.Push(lua.LString(fid))
+		return 5
+	}))
 	//保存验证错误日志
 	s.L.SetGlobal("saveErrLog", s.L.NewFunction(func(S *lua.LState) int {
 		code := S.ToString(-4)
@@ -519,108 +642,6 @@ func (s *Script) LoadScript(site *string, code, script_file string, newstate boo
 		Msclient.Call("", msgid, mu.SERVICE_OFFICE_ANALYSIS, mu.SENDTO_TYPE_ALL_RECIVER, bs, 60)
 		return 1
 	}))
-
-	//下载附件download(url,method,param,head,cookie,fileName)
-	s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
-		if s.FileLastThreeTimes == nil {
-			s.FileLastThreeTimes = make([]time.Duration, 4)
-		}
-		if util.Config.IsDelay {
-			SleepTime(3, s.FileLastThreeTimes) //睡眠时间
-		}
-		start := time.Now() //起始时间
-		cookie := S.ToString(-1)
-		head := S.ToTable(-2)
-		param := S.ToTable(-3)
-		method := S.ToString(-4)
-		url := S.ToString(-5)
-		fileName := S.ToString(-6)
-		ishttps := strings.Contains(url, "https")
-		//base64匹配
-		base64UrlReg := regexp.MustCompile("data:image")
-		indexArr := base64UrlReg.FindStringIndex(url)
-		name, size, ftype, fid := "", "", "", ""
-		tmpUrl := ""
-		var ret []byte
-		var err error
-		var mycookie []*http.Cookie
-		if cookie != "{}" {
-			json.Unmarshal([]byte(cookie), &mycookie)
-		} else {
-			mycookie = make([]*http.Cookie, 0)
-		}
-
-		//base64 url
-		if len(indexArr) == 2 { //base64 http://www.mmjyjt.com/data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqAAAAOwCAYAAAD
-			//截取base64
-			start := indexArr[0]
-			url = url[start:]
-			fileName = "文件下载.jpg"
-			index := strings.Index(url, ",")
-			dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(url[index+1:]))
-			ret, err = io.ReadAll(dec)
-			if err == nil && len(ret) > 0 {
-				url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, "", ret)
-			}
-		} else {
-			fileName = strings.TrimSpace(fileName)
-			url = strings.TrimSpace(url)
-			tmpUrl = url
-			ret = DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
-			url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
-			if strings.TrimSpace(ftype) == "" {
-				if len(path.Ext(name)) > 0 {
-					ftype = path.Ext(name)[1:]
-				}
-			}
-		}
-		//特殊处理中国招标投标公共服务平台异常附件过滤
-		if *site == "中国招标投标公共服务平台" {
-			if fid != "" && strings.Contains(fid, ErrFid) { //限制访问的附件
-				size, ftype, fid = "", "", "" //信息置空,AnalysisProjectInfo方法将判断数据下载失败重新下载
-			} else if bttype := qu.GetFileType(ret); bttype != "pdf" { //由字节流解析的附件类型不是pdf
-				logger.Info("Error File Type:", bttype, url)
-				size, ftype, fid = "", "", ""
-			}
-		} else if *site == "中国政府采购网" && tmpUrl != "" { //中国政府采购网附件大小异常,限制IP所致
-			if size == "4.1 KB" || size == "4.2 KB" {
-				times := 1
-				for { //重试三次
-					if times > 3 {
-						break
-					}
-					//http://www.ccgp.gov.cn/cggg/dfgg/jzxcs/202302/t20230210_19437644.htm
-					ret = DownloadFile(s.Downloader, tmpUrl, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
-					bs := bytes.NewReader(ret)
-					bsLen := qu.ConvertFileSize(bs.Len())
-					if bsLen != "4.1 KB" && bsLen != "4.2 KB" && bsLen != "0 B" {
-						url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, tmpUrl, ret)
-						break
-					}
-					times++
-				}
-				if size == "4.1 KB" || size == "4.2 KB" { //重试后异常
-					fid = ""
-					ftype = ""
-					name = ""
-				}
-			}
-		}
-		S.Push(lua.LString(url))
-		S.Push(lua.LString(name))
-		S.Push(lua.LString(size))
-		S.Push(lua.LString(ftype))
-		S.Push(lua.LString(fid))
-		atomic.AddInt32(&s.ToDayRequestNum, 1)
-		atomic.AddInt32(&s.TotalRequestNum, 1)
-
-		end := time.Since(start)
-		if len(s.FileLastThreeTimes) >= 4 {
-			s.FileLastThreeTimes = s.FileLastThreeTimes[1:]
-		}
-		s.FileLastThreeTimes = append(s.FileLastThreeTimes, end)
-		return 5
-	}))
 	s.L.SetGlobal("clearMemoeryCache", s.L.NewFunction(func(S *lua.LState) int {
 		/*title := S.ToString(-1)
 		isExist, _ := redis.Exists("title_repeat_judgement", "title_repeat_"+title)
@@ -727,25 +748,49 @@ func (s *Script) LoadScript(site *string, code, script_file string, newstate boo
 		S.Push(lua.LBool(ok))
 		return 1
 	}))
+	//base64加密
+	s.L.SetGlobal("encodeBase64", s.L.NewFunction(func(S *lua.LState) int {
+		text := S.ToString(-1)
+		base64Text := base64.StdEncoding.EncodeToString([]byte(text))
+		S.Push(lua.LString(base64Text))
+		return 1
+	}))
+	//base64解密
+	s.L.SetGlobal("decodeBase64", s.L.NewFunction(func(S *lua.LState) int {
+		text := S.ToString(-1)
+		result := ""
+		byteText, err := base64.StdEncoding.DecodeString(text)
+		if err == nil {
+			result = string(byteText)
+		}
+		S.Push(lua.LString(result))
+		return 1
+	}))
 	//aes ecb模式加密
 	s.L.SetGlobal("aesEncryptECB", s.L.NewFunction(func(S *lua.LState) int {
 		origData := S.ToString(-2)
 		key := S.ToString(-1)
 		bytekey := []byte(key)
 		byteorigData := []byte(origData)
-		cipher, _ := aes.NewCipher(generateKey([]byte(bytekey)))
-		length := (len(byteorigData) + aes.BlockSize) / aes.BlockSize
-		plain := make([]byte, length*aes.BlockSize)
-		copy(plain, byteorigData)
-		pad := byte(len(plain) - len(byteorigData))
-		for i := len(byteorigData); i < len(plain); i++ {
-			plain[i] = pad
-		}
-		encrypted := make([]byte, len(plain))
-		// 分组分块加密
-		for bs, be := 0, cipher.BlockSize(); bs <= len(byteorigData); bs, be = bs+cipher.BlockSize(), be+cipher.BlockSize() {
-			cipher.Encrypt(encrypted[bs:be], plain[bs:be])
-		}
+		encrypted := util.AesECBEncrypt(byteorigData, bytekey)
+		result := base64.StdEncoding.EncodeToString(encrypted)
+		S.Push(lua.LString(result))
+		return 1
+	}))
+	//aes ecb模式解密
+	s.L.SetGlobal("aesDecryptECB", s.L.NewFunction(func(S *lua.LState) int {
+		origData := S.ToString(-2)
+		key := S.ToString(-1)
+		data, _ := base64.StdEncoding.DecodeString(origData)
+		result := util.AesECBDecrypter(data, []byte(key))
+		S.Push(lua.LString(result))
+		return 1
+	}))
+	//des ecb模式加密
+	s.L.SetGlobal("desEncryptECB", s.L.NewFunction(func(S *lua.LState) int {
+		origData := S.ToString(-2)
+		key := S.ToString(-1)
+		encrypted := util.DesECBEncrypt([]byte(origData), []byte(key))
 		result := base64.StdEncoding.EncodeToString(encrypted)
 		S.Push(lua.LString(result))
 		return 1
@@ -754,8 +799,26 @@ func (s *Script) LoadScript(site *string, code, script_file string, newstate boo
 	s.L.SetGlobal("desDecryptECB", s.L.NewFunction(func(S *lua.LState) int {
 		origData := S.ToString(-2)
 		key := S.ToString(-1)
-		b, _ := base64.StdEncoding.DecodeString(origData)
-		result := util.DesECBDecrypter(b, []byte(key))
+		data, _ := base64.StdEncoding.DecodeString(origData)
+		result := util.DesECBDecrypter(data, []byte(key))
+		S.Push(lua.LString(result))
+		return 1
+	}))
+	//rsa 公钥加密
+	s.L.SetGlobal("rsaEncrypt", s.L.NewFunction(func(S *lua.LState) int {
+		origData := S.ToString(-2)
+		key := S.ToString(-1)
+		encrypted := util.EncryptWithPublicKey([]byte(origData), []byte(key))
+		result := base64.StdEncoding.EncodeToString(encrypted)
+		S.Push(lua.LString(result))
+		return 1
+	}))
+	//rsa 私钥解密
+	s.L.SetGlobal("rsaDecrypt", s.L.NewFunction(func(S *lua.LState) int {
+		origData := S.ToString(-2)
+		key := S.ToString(-1)
+		data, _ := base64.StdEncoding.DecodeString(origData)
+		result := util.DecryptWithPrivateKey(data, []byte(key))
 		S.Push(lua.LString(result))
 		return 1
 	}))
@@ -814,24 +877,6 @@ func (s *Script) LoadScript(site *string, code, script_file string, newstate boo
 		S.Push(lua.LString(result))
 		return 1
 	}))
-	//base64加密
-	s.L.SetGlobal("encodeBase64", s.L.NewFunction(func(S *lua.LState) int {
-		text := S.ToString(-1)
-		base64Text := base64.StdEncoding.EncodeToString([]byte(text))
-		S.Push(lua.LString(base64Text))
-		return 1
-	}))
-	//base64解密
-	s.L.SetGlobal("decodeBase64", s.L.NewFunction(func(S *lua.LState) int {
-		text := S.ToString(-1)
-		result := ""
-		byteText, err := base64.StdEncoding.DecodeString(text)
-		if err == nil {
-			result = string(byteText)
-		}
-		S.Push(lua.LString(result))
-		return 1
-	}))
 	//长度
 	s.L.SetGlobal("stringLen", s.L.NewFunction(func(S *lua.LState) int {
 		text := S.ToString(-1)
@@ -861,6 +906,7 @@ func (s *Script) LoadScript(site *string, code, script_file string, newstate boo
 		return 1
 	}))
 	//获取验证码
+	//获取验证码
 	s.L.SetGlobal("getCodeByPath", s.L.NewFunction(func(S *lua.LState) int {
 		cookie := S.ToString(-1)
 		head := S.ToTable(-2)