|
@@ -1,25 +1,29 @@
|
|
package script
|
|
package script
|
|
|
|
|
|
import (
|
|
import (
|
|
|
|
+ "bytes"
|
|
"context"
|
|
"context"
|
|
|
|
+ "crypto/tls"
|
|
|
|
+ "encoding/json"
|
|
"errors"
|
|
"errors"
|
|
"fmt"
|
|
"fmt"
|
|
|
|
+ "github.com/chromedp/cdproto/browser"
|
|
|
|
+ "github.com/chromedp/cdproto/network"
|
|
|
|
+ "github.com/chromedp/cdproto/page"
|
|
|
|
+ "github.com/chromedp/chromedp"
|
|
|
|
+ "github.com/imroc/req/v3"
|
|
|
|
+ "github.com/yuin/gopher-lua"
|
|
"github.com/yuin/gopher-lua/parse"
|
|
"github.com/yuin/gopher-lua/parse"
|
|
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
|
|
+ "net/http"
|
|
"net/url"
|
|
"net/url"
|
|
"os"
|
|
"os"
|
|
"path/filepath"
|
|
"path/filepath"
|
|
"spider_creator/backend"
|
|
"spider_creator/backend"
|
|
|
|
+ be "spider_creator/backend"
|
|
"strconv"
|
|
"strconv"
|
|
"strings"
|
|
"strings"
|
|
"time"
|
|
"time"
|
|
-
|
|
|
|
- "github.com/chromedp/cdproto/browser"
|
|
|
|
- "github.com/chromedp/cdproto/network"
|
|
|
|
- "github.com/chromedp/cdproto/page"
|
|
|
|
- "github.com/chromedp/chromedp"
|
|
|
|
- "github.com/yuin/gopher-lua"
|
|
|
|
- be "spider_creator/backend"
|
|
|
|
)
|
|
)
|
|
|
|
|
|
const (
|
|
const (
|
|
@@ -423,6 +427,166 @@ func (b *GLBrowser) DownloadFile(tabTitle, tabUrl string, timeout int64, selecto
|
|
act)
|
|
act)
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+func (b *GLBrowser) AnalysisCode(path, stype, head, cookie string, proxy bool) (code string, rh http.Header, rc []*http.Cookie) {
|
|
|
|
+ //先用免费,为识别再用收费
|
|
|
|
+ ok := false
|
|
|
|
+ code, rh, rc, _, ok = getCodeByFree(path, stype, head, cookie, proxy) //自己的服务
|
|
|
|
+ qu.Debug("Get Code By Free Result:", path, ok, code)
|
|
|
|
+ if qu.IntAll(stype) > 0 && !ok {
|
|
|
|
+ code, rh, rc = getCodeByPay(path, stype, head, cookie, proxy) //超级鹰收费
|
|
|
|
+ }
|
|
|
|
+ return
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func getCodeByFree(path, stype, head, cookie string, proxy bool) (code string, respheader http.Header, respcookie []*http.Cookie, getCodeResp *req.Response, ok bool) {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ client := req.C().
|
|
|
|
+ SetTimeout(time.Duration(be.Cfg.ServerCodeTimeOut) * time.Second).
|
|
|
|
+ SetTLSClientConfig(&tls.Config{
|
|
|
|
+ Renegotiation: tls.RenegotiateOnceAsClient,
|
|
|
|
+ InsecureSkipVerify: true,
|
|
|
|
+ }) //忽略证书验证
|
|
|
|
+ headers := map[string]string{}
|
|
|
|
+ if head != "" {
|
|
|
|
+ json.Unmarshal([]byte(head), &headers)
|
|
|
|
+ }
|
|
|
|
+ cookies := []*http.Cookie{}
|
|
|
|
+ if cookie != "" {
|
|
|
|
+ json.Unmarshal([]byte(cookie), &cookies)
|
|
|
|
+ }
|
|
|
|
+ for times := 1; times <= 6; times++ { //重试三次
|
|
|
|
+ if times > 2 || proxy { //重试第4次开始,使用代理ip
|
|
|
|
+ if stype == "-1" {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ proxyIp := be.GetProxyAddr() //获取代理地址
|
|
|
|
+ qu.Debug("proxy:", path, proxyIp)
|
|
|
|
+ client.SetProxyURL(proxyIp) //设置代理IP
|
|
|
|
+ }
|
|
|
|
+ request := client.R()
|
|
|
|
+ if len(headers) > 0 {
|
|
|
|
+ request.SetHeaders(headers)
|
|
|
|
+ }
|
|
|
|
+ if len(cookies) > 0 {
|
|
|
|
+ request.SetCookies(cookies...)
|
|
|
|
+ }
|
|
|
|
+ //下载验证码图片
|
|
|
|
+ var err error
|
|
|
|
+ var resultByte []byte
|
|
|
|
+ address := be.Cfg.ServerCodeFreeAddressOcr
|
|
|
|
+ if stype == "-1" { //传base64的图片
|
|
|
|
+ resultByte = []byte(path)
|
|
|
|
+ } else {
|
|
|
|
+ if stype == "6001" { //计算类验证码解析接口地址
|
|
|
|
+ address = be.Cfg.ServerCodeFreeAddressArithmetic
|
|
|
|
+ }
|
|
|
|
+ getCodeResp, err = request.Get(path) //通过请求图片地址返回的byte
|
|
|
|
+ resultByte = getCodeResp.Bytes()
|
|
|
|
+ }
|
|
|
|
+ if err != nil {
|
|
|
|
+ qu.Debug("Get Code By Path Error: ", path, err)
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ //解析验证码
|
|
|
|
+ codeResp, err := client.R().
|
|
|
|
+ SetHeader("accept", "application/json").
|
|
|
|
+ SetFileReader("file", "1", bytes.NewReader(resultByte)).
|
|
|
|
+ Post(address)
|
|
|
|
+ if err != nil {
|
|
|
|
+ qu.Debug("analysis code by path err: ", path, err)
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ yzmResult := map[string]interface{}{}
|
|
|
|
+ json.Unmarshal(codeResp.Bytes(), &yzmResult)
|
|
|
|
+ qu.Debug(path, yzmResult)
|
|
|
|
+ result := yzmResult["r"].(map[string]interface{})
|
|
|
|
+ yzm := fmt.Sprint(result["code"])
|
|
|
|
+ if yzm != "" {
|
|
|
|
+ if stype == "6001" || len(yzm) >= 4 {
|
|
|
|
+ code = yzm //长度小于4的视为识别错误
|
|
|
|
+ if getCodeResp != nil {
|
|
|
|
+ respheader = getCodeResp.Header
|
|
|
|
+ respcookie = getCodeResp.Cookies()
|
|
|
|
+ }
|
|
|
|
+ ok = true
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func getCodeByPay(path, stype, head, cookie string, proxy bool) (code string, respheader http.Header, respcookie []*http.Cookie) {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ client := req.C().
|
|
|
|
+ SetTimeout(time.Duration(be.Cfg.ServerCodeTimeOut) * time.Second).
|
|
|
|
+ SetTLSClientConfig(&tls.Config{
|
|
|
|
+ Renegotiation: tls.RenegotiateOnceAsClient,
|
|
|
|
+ InsecureSkipVerify: true,
|
|
|
|
+ }) //忽略证书验证
|
|
|
|
+ headers := map[string]string{}
|
|
|
|
+ if head != "" {
|
|
|
|
+ json.Unmarshal([]byte(head), &headers)
|
|
|
|
+ }
|
|
|
|
+ cookies := []*http.Cookie{}
|
|
|
|
+ if cookie != "" {
|
|
|
|
+ json.Unmarshal([]byte(cookie), &cookies)
|
|
|
|
+ }
|
|
|
|
+ for times := 1; times <= 2; times++ { //重试三次
|
|
|
|
+ //atomic.AddInt64(&PyTimes, 1)
|
|
|
|
+ if times > 1 || proxy { //重试第2次开始,使用代理ip
|
|
|
|
+ proxyIp := be.GetProxyAddr() //获取代理地址
|
|
|
|
+ qu.Debug("proxy:", path, proxyIp)
|
|
|
|
+ client.SetProxyURL(proxyIp) //设置代理IP
|
|
|
|
+ }
|
|
|
|
+ request := client.R()
|
|
|
|
+ if len(headers) > 0 {
|
|
|
|
+ request.SetHeaders(headers)
|
|
|
|
+ }
|
|
|
|
+ if len(cookies) > 0 {
|
|
|
|
+ request.SetCookies(cookies...)
|
|
|
|
+ }
|
|
|
|
+ //下载验证码图片
|
|
|
|
+ getCodeResp, err := request.Get(path)
|
|
|
|
+ //log.Println("respHeader---", getCodeResp.Header)
|
|
|
|
+ //log.Println("respCookie---", getCodeResp.Cookies())
|
|
|
|
+ if err != nil {
|
|
|
|
+ qu.Debug("Get Code By Path Error: ", path, err)
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ //解析验证码
|
|
|
|
+ data := map[string]string{
|
|
|
|
+ "grant_type": "",
|
|
|
|
+ "username": "jianyu001",
|
|
|
|
+ "password": "123qwe!A",
|
|
|
|
+ "scope": "",
|
|
|
|
+ "client_id": "",
|
|
|
|
+ "client_secret ": "",
|
|
|
|
+ }
|
|
|
|
+ codeResp, err := client.R().
|
|
|
|
+ SetHeader("accept", "application/json").
|
|
|
|
+ SetFileReader("file", "1", bytes.NewReader(getCodeResp.Bytes())).
|
|
|
|
+ SetFormData(data).
|
|
|
|
+ Post(be.Cfg.ServerCodeAddress + stype)
|
|
|
|
+ //SetFile("file", "C:/Users/topnet/Desktop/code.jpg").
|
|
|
|
+ // Post(spiderutil.Config.ServerCodeAddress)
|
|
|
|
+ if err != nil {
|
|
|
|
+ qu.Debug("analysis code by path err: ", path, err)
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ codeResult := map[string]interface{}{}
|
|
|
|
+ json.Unmarshal(codeResp.Bytes(), &codeResult)
|
|
|
|
+ qu.Debug("codeResult:", codeResult)
|
|
|
|
+ if yzm, ok := codeResult["r"].(map[string]interface{})["pic_str"].(string); ok && yzm != "" && len(yzm) >= 4 {
|
|
|
|
+ code = yzm
|
|
|
|
+ respheader = getCodeResp.Header
|
|
|
|
+ respcookie = getCodeResp.Cookies()
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return
|
|
|
|
+}
|
|
|
|
+
|
|
// BindLuaState
|
|
// BindLuaState
|
|
func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
|
|
func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
|
|
//执行暂停
|
|
//执行暂停
|
|
@@ -591,6 +755,35 @@ func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
|
|
}
|
|
}
|
|
return 1
|
|
return 1
|
|
}))
|
|
}))
|
|
|
|
+ s.SetGlobal("browser_analysiscode", s.NewFunction(func(S *lua.LState) int {
|
|
|
|
+ cookie := S.ToString(-1)
|
|
|
|
+ head := S.ToTable(-2)
|
|
|
|
+ stype := S.ToString(-3)
|
|
|
|
+ path := S.ToString(-4)
|
|
|
|
+ proxy := S.ToBool(-5)
|
|
|
|
+ headMap := TableToMap(head)
|
|
|
|
+ //qu.Debug("cookie----------", cookie)
|
|
|
|
+ //qu.Debug("headMap----------", headMap)
|
|
|
|
+ headJsonStr := ""
|
|
|
|
+ headByte, err := json.Marshal(headMap)
|
|
|
|
+ if err == nil {
|
|
|
|
+ headJsonStr = string(headByte)
|
|
|
|
+ }
|
|
|
|
+ code, respHead, respCookie := b.AnalysisCode(path, stype, headJsonStr, cookie, proxy)
|
|
|
|
+ rhead, _ := json.Marshal(respHead)
|
|
|
|
+ respHeadMap := map[string]interface{}{}
|
|
|
|
+ json.Unmarshal(rhead, &respHeadMap)
|
|
|
|
+ hTable := MapToTable(respHeadMap)
|
|
|
|
+
|
|
|
|
+ rcookie, _ := json.Marshal(respCookie)
|
|
|
|
+ respCookieMap := []map[string]interface{}{}
|
|
|
|
+ json.Unmarshal(rcookie, &respCookieMap)
|
|
|
|
+ cTable := MapToTable(map[string]interface{}{"cookie": respCookieMap})
|
|
|
|
+ S.Push(lua.LString(code))
|
|
|
|
+ S.Push(hTable)
|
|
|
|
+ S.Push(cTable.RawGetString("cookie"))
|
|
|
|
+ return 3
|
|
|
|
+ }))
|
|
//发布时间格式化
|
|
//发布时间格式化
|
|
s.SetGlobal("browser_publishtime", s.NewFunction(func(l *lua.LState) int {
|
|
s.SetGlobal("browser_publishtime", s.NewFunction(func(l *lua.LState) int {
|
|
text := l.ToString(-1)
|
|
text := l.ToString(-1)
|