Bläddra i källkod

新增验证码解析方法

mxs 7 månader sedan
förälder
incheckning
5892b587a8
2 ändrade filer med 163 tillägg och 49 borttagningar
  1. 162 49
      backend/script/script.go
  2. 1 0
      qianlima.go

+ 162 - 49
backend/script/script.go

@@ -14,6 +14,7 @@ import (
 	"github.com/imroc/req/v3"
 	"github.com/yuin/gopher-lua"
 	"github.com/yuin/gopher-lua/parse"
+	"io/ioutil"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"net/http"
 	"net/url"
@@ -427,7 +428,7 @@ func (b *GLBrowser) DownloadFile(tabTitle, tabUrl string, timeout int64, selecto
 		act)
 }
 
-func (b *GLBrowser) AnalysisCode(path, stype, head, cookie string, proxy bool) (code string, rh http.Header, rc []*http.Cookie) {
+func (b *GLBrowser) AnalyzeCodeByPath(path, stype, head, cookie string, proxy bool) (code string, rh http.Header, rc []*http.Cookie) {
 	//先用免费,为识别再用收费
 	ok := false
 	code, rh, rc, _, ok = getCodeByFree(path, stype, head, cookie, proxy) //自己的服务
@@ -473,12 +474,12 @@ func getCodeByFree(path, stype, head, cookie string, proxy bool) (code string, r
 		//下载验证码图片
 		var err error
 		var resultByte []byte
-		address := be.Cfg.ServerCodeFreeAddressOcr
+		//address := be.Cfg.ServerCodeFreeAddressOcr
 		if stype == "-1" { //传base64的图片
 			resultByte = []byte(path)
 		} else {
 			if stype == "6001" { //计算类验证码解析接口地址
-				address = be.Cfg.ServerCodeFreeAddressArithmetic
+				//address = be.Cfg.ServerCodeFreeAddressArithmetic
 			}
 			getCodeResp, err = request.Get(path) //通过请求图片地址返回的byte
 			resultByte = getCodeResp.Bytes()
@@ -487,31 +488,43 @@ func getCodeByFree(path, stype, head, cookie string, proxy bool) (code string, r
 			qu.Debug("Get Code By Path Error: ", path, err)
 			continue
 		}
-		//解析验证码
-		codeResp, err := client.R().
-			SetHeader("accept", "application/json").
-			SetFileReader("file", "1", bytes.NewReader(resultByte)).
-			Post(address)
-		if err != nil {
-			qu.Debug("analysis code by path err: ", path, err)
-			continue
-		}
-		yzmResult := map[string]interface{}{}
-		json.Unmarshal(codeResp.Bytes(), &yzmResult)
-		qu.Debug(path, yzmResult)
-		result := yzmResult["r"].(map[string]interface{})
-		yzm := fmt.Sprint(result["code"])
-		if yzm != "" {
-			if stype == "6001" || len(yzm) >= 4 {
-				code = yzm //长度小于4的视为识别错误
-				if getCodeResp != nil {
-					respheader = getCodeResp.Header
-					respcookie = getCodeResp.Cookies()
-				}
-				ok = true
-				return
+		code, err = getCode(resultByte, stype, true)
+		if err == nil && code != "" {
+			if getCodeResp != nil {
+				respheader = getCodeResp.Header
+				respcookie = getCodeResp.Cookies()
 			}
+			ok = true
+			return
 		}
+		//解析验证码
+		//codeResp, err := client.R().
+		//	SetHeader("accept", "application/json").
+		//	SetFileReader("file", "1", bytes.NewReader(resultByte)).
+		//	Post(address)
+		//if err != nil {
+		//	qu.Debug("analysis code by path err: ", path, err)
+		//	continue
+		//}
+		//yzmResult := map[string]interface{}{}
+		//json.Unmarshal(codeResp.Bytes(), &yzmResult)
+		//qu.Debug(path, yzmResult)
+		//if err != nil || yzmResult == nil {
+		//	continue
+		//}
+		//result := yzmResult["r"].(map[string]interface{})
+		//yzm := fmt.Sprint(result["code"])
+		//if yzm != "" {
+		//	if stype == "6001" || len(yzm) >= 4 {
+		//		code = yzm //长度小于4的视为识别错误
+		//		if getCodeResp != nil {
+		//			respheader = getCodeResp.Header
+		//			respcookie = getCodeResp.Cookies()
+		//		}
+		//		ok = true
+		//		return
+		//	}
+		//}
 	}
 	return
 }
@@ -554,39 +567,127 @@ func getCodeByPay(path, stype, head, cookie string, proxy bool) (code string, re
 			qu.Debug("Get Code By Path Error: ", path, err)
 			continue
 		}
+		code, err = getCode(getCodeResp.Bytes(), stype, false)
+		if err == nil && code != "" {
+			respheader = getCodeResp.Header
+			respcookie = getCodeResp.Cookies()
+			return
+		}
 		//解析验证码
+		//data := map[string]string{
+		//	"grant_type":     "",
+		//	"username":       "jianyu001",
+		//	"password":       "123qwe!A",
+		//	"scope":          "",
+		//	"client_id":      "",
+		//	"client_secret ": "",
+		//}
+		//codeResp, err := client.R().
+		//	SetHeader("accept", "application/json").
+		//	SetFileReader("file", "1", bytes.NewReader(getCodeResp.Bytes())).
+		//	SetFormData(data).
+		//	Post(be.Cfg.ServerCodeAddress + stype)
+		//if err != nil {
+		//	qu.Debug("analysis code by path err: ", path, err)
+		//	continue
+		//}
+		//codeResult := map[string]interface{}{}
+		//json.Unmarshal(codeResp.Bytes(), &codeResult)
+		//qu.Debug("codeResult:", codeResult)
+		//qu.Debug("codeResult:", result)
+		//if err != nil || result == nil {
+		//	continue
+		//}
+		//if yzm, ok := result["r"].(map[string]interface{})["pic_str"].(string); ok && yzm != "" && len(yzm) >= 4 {
+		//	code = yzm
+		//	respheader = getCodeResp.Header
+		//	respcookie = getCodeResp.Cookies()
+		//	return
+		//}
+	}
+	return
+}
+func getCode(b []byte, stype string, free bool) (code string, err error) {
+	qu.Debug("验证码类型:", stype)
+	//解析验证码
+	request := req.C().R().
+		SetHeader("accept", "application/json").
+		SetFileReader("file", "1", bytes.NewReader(b))
+	address := be.Cfg.ServerCodeFreeAddressOcr
+	if !free {
 		data := map[string]string{
 			"grant_type":     "",
-			"username":       "jianyu001",
-			"password":       "123qwe!A",
+			"username":       be.Cfg.Username,
+			"password":       be.Cfg.Password,
 			"scope":          "",
 			"client_id":      "",
 			"client_secret ": "",
 		}
-		codeResp, err := client.R().
-			SetHeader("accept", "application/json").
-			SetFileReader("file", "1", bytes.NewReader(getCodeResp.Bytes())).
-			SetFormData(data).
-			Post(be.Cfg.ServerCodeAddress + stype)
-		//SetFile("file", "C:/Users/topnet/Desktop/code.jpg").
-		// Post(spiderutil.Config.ServerCodeAddress)
-		if err != nil {
-			qu.Debug("analysis code by path err: ", path, err)
-			continue
-		}
-		codeResult := map[string]interface{}{}
-		json.Unmarshal(codeResp.Bytes(), &codeResult)
-		qu.Debug("codeResult:", codeResult)
-		if yzm, ok := codeResult["r"].(map[string]interface{})["pic_str"].(string); ok && yzm != "" && len(yzm) >= 4 {
-			code = yzm
-			respheader = getCodeResp.Header
-			respcookie = getCodeResp.Cookies()
-			return
+		request.SetFormData(data)
+		address = be.Cfg.ServerCodeAddress + stype
+	} else if stype == "6001" { //计算类验证码解析接口地址
+		address = be.Cfg.ServerCodeFreeAddressArithmetic
+	}
+	qu.Debug("address:", address)
+	var resp *req.Response
+	resp, err = request.Post(address)
+	if err != nil {
+		qu.Debug("analysis code by path err: ", err)
+		return
+	}
+	var result map[string]interface{}
+	err = json.Unmarshal(resp.Bytes(), &result)
+	qu.Debug("验证码解析结果:", result)
+	if err == nil && result != nil {
+		if free {
+			r, _ := result["r"].(map[string]interface{})
+			codeTmp := fmt.Sprint(r["code"])
+			if len(codeTmp) >= 4 || stype == "6001" && codeTmp != "" {
+				qu.Debug("验证码解析结果:", codeTmp)
+				return codeTmp, nil
+			}
+		} else {
+			if codeTmp, ok := result["r"].(map[string]interface{})["pic_str"].(string); ok && codeTmp != "" && len(codeTmp) >= 4 {
+				return codeTmp, nil
+			}
 		}
 	}
 	return
 }
 
+// 截屏
+func (b *GLBrowser) AnalyzeCodeScreenShot(tabTitle, tabUrl, selector string, selectorType int, timeout int64, stype string) (code string, err error) {
+	ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
+	if err != nil {
+		return
+	}
+	var act chromedp.QueryAction
+	var bt []byte
+	switch selectorType {
+	case selector_type_id:
+		act = chromedp.Screenshot(selector, &bt, chromedp.ByID)
+	case selector_type_query:
+		act = chromedp.Screenshot(selector, &bt, chromedp.ByQuery)
+	//case selector_type_search:
+	//case selector_type_jspath:
+	default:
+		//option = chromedp.ByQueryAll
+		chromedp.Screenshot(selector, &bt, chromedp.ByQueryAll)
+	}
+	err = chromedp.Run(ctx,
+		act,
+	)
+	//保存
+	if err = ioutil.WriteFile("code.png", bt, 0755); err != nil {
+		qu.Debug(err)
+	}
+	code, err = getCode(bt, stype, false) //免费
+	if err != nil || code == "" {
+		code, err = getCode(bt, stype, false) //收费
+	}
+	return
+}
+
 // BindLuaState
 func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
 	//执行暂停
@@ -755,7 +856,7 @@ func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
 		}
 		return 1
 	}))
-	s.SetGlobal("browser_analysiscode", s.NewFunction(func(S *lua.LState) int {
+	s.SetGlobal("browser_analyzecode_bypath", s.NewFunction(func(S *lua.LState) int {
 		cookie := S.ToString(-1)
 		head := S.ToTable(-2)
 		stype := S.ToString(-3)
@@ -769,7 +870,7 @@ func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
 		if err == nil {
 			headJsonStr = string(headByte)
 		}
-		code, respHead, respCookie := b.AnalysisCode(path, stype, headJsonStr, cookie, proxy)
+		code, respHead, respCookie := b.AnalyzeCodeByPath(path, stype, headJsonStr, cookie, proxy)
 		rhead, _ := json.Marshal(respHead)
 		respHeadMap := map[string]interface{}{}
 		json.Unmarshal(rhead, &respHeadMap)
@@ -791,6 +892,18 @@ func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
 		l.Push(lua.LString(publishtime))
 		return 1
 	}))
+	//截屏功能
+	s.SetGlobal("browser_analyzecode_screenshot", s.NewFunction(func(l *lua.LState) int {
+		tabTitle := l.ToString(-6)
+		tabUrl := l.ToString(-5)
+		stype := l.ToString(-4)
+		timeout := l.ToInt64(-3)
+		selectorType := l.ToInt(-2)
+		selector := l.ToString(-1)
+		code, _ := b.AnalyzeCodeScreenShot(tabTitle, tabUrl, selector, selectorType, timeout, stype)
+		l.Push(lua.LString(code))
+		return 1
+	}))
 	//保存数据
 	s.SetGlobal("browser_savedata", s.NewFunction(func(l *lua.LState) int {
 		//fmt.Println("---browser_savedata---")

+ 1 - 0
qianlima.go

@@ -59,6 +59,7 @@ func (a *App) QlmDetailDataDownload(param map[string]interface{}, record map[str
 			if detailScript != "" {
 				script.Datas = []map[string]interface{}{}
 				getData(nil, qu.ObjToString(record["recordid"]), "json", "download", &script.Datas)
+				qu.Debug("获取待采数据量:", len(script.Datas))
 				if len(script.Datas) > 0 {
 					r.Err = 1
 					go DownloadData(record, detailScript, page) //下载