123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940 |
- package script
- import (
- "bytes"
- "context"
- "crypto/tls"
- "encoding/json"
- "errors"
- "fmt"
- "github.com/chromedp/cdproto/browser"
- "github.com/chromedp/cdproto/network"
- "github.com/chromedp/cdproto/page"
- "github.com/chromedp/chromedp"
- "github.com/imroc/req/v3"
- "github.com/yuin/gopher-lua"
- "github.com/yuin/gopher-lua/parse"
- "io/ioutil"
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "net/http"
- "net/url"
- "os"
- "path/filepath"
- "spider_creator/backend"
- be "spider_creator/backend"
- "strconv"
- "strings"
- "time"
- )
- const (
- selector_type_id = 0
- selector_type_query = 1
- selector_type_search = 2
- selector_type_jspath = 3
- selector_type_query_all = 4
- execute_return_type_string = 0
- execute_return_type_list = 1
- execute_return_type_table = 2
- qlm_list_lua = "/qlm_list.lua"
- qlm_detail_lua = "/qlm_detail.lua"
- )
- var (
- DataCache = make(chan map[string]interface{}, 2000)
- Datas []map[string]interface{}
- )
- type GLVm struct {
- ScriptDir string
- LogsDir string
- LogsFile *os.File
- Dnf backend.EventNotifyFace
- Headless bool
- ShowImage bool
- ProxyServer bool
- ProxyAddr string
- B *GLBrowser
- ScriptRunning bool //控制一次只能执行一个脚本
- DataSaveOver chan bool
- }
- type GLBrowser struct {
- Ctx context.Context
- CancelFn context.CancelFunc
- }
- func NewGLVM(scriptDir, logsDir string, dnf be.EventNotifyFace) *GLVm {
- return &GLVm{
- ScriptDir: scriptDir,
- LogsDir: logsDir,
- Dnf: dnf,
- DataSaveOver: make(chan bool, 1),
- }
- }
- // LoadScript 加载脚本
- func (glvm *GLVm) LoadScript(page string) string {
- var path string
- if page == "list" {
- path = glvm.ScriptDir + qlm_list_lua
- } else if page == "detail" {
- path = glvm.ScriptDir + qlm_detail_lua
- }
- bs, err := os.ReadFile(path)
- if err != nil {
- qu.Debug(path, "脚本加载失败...")
- }
- return string(bs)
- }
- // RunScript 执行lua代码
- func (glvm *GLVm) RunScript(script, recordId string) error {
- defer qu.Catch()
- var s *lua.LState = lua.NewState()
- defer s.Close()
- //日志文件
- now := time.Now()
- path := glvm.LogsDir + fmt.Sprintf("/%s.log", qu.FormatDate(&now, qu.Date_Short_Layout))
- qu.Debug("log path:", path)
- file, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0666)
- if err != nil {
- qu.Debug("日志创建失败:", err)
- return err
- }
- glvm.LogsFile = file
- defer glvm.LogsFile.Close()
- //方法绑定
- glvm.ResetBrowser() //先创建浏览器对象
- glvm.BindLuaState(s) //绑定虚拟机函数
- glvm.B.BindLuaState(s, recordId)
- defer func() {
- if b := glvm.B; b != nil {
- b.CancelFn()
- b.Ctx = nil
- b.CancelFn = nil
- b = nil
- }
- }()
- reader := strings.NewReader(script)
- chunk, err := parse.Parse(reader, "code")
- if err != nil {
- return err
- }
- proto, err := lua.Compile(chunk, script)
- if err != nil {
- return err
- }
- lfunc := s.NewFunctionFromProto(proto)
- s.Push(lfunc)
- s.Call(0, 0)
- return nil
- }
- // ResetBrowser 重置浏览器
- func (glvm *GLVm) ResetBrowser() {
- if glvm.B != nil && glvm.B.CancelFn != nil {
- glvm.B.CancelFn()
- glvm.B.Ctx = nil
- glvm.B.CancelFn = nil
- }
- _, _, _, _, ctx, incCancelFn := backend.NewBrowser(glvm.Headless, glvm.ShowImage, glvm.ProxyServer, "http://")
- b := &GLBrowser{
- Ctx: ctx,
- CancelFn: incCancelFn,
- }
- if glvm.B == nil {
- glvm.B = b
- } else {
- glvm.B.Ctx, glvm.B.CancelFn = b.Ctx, b.CancelFn
- }
- }
- // BindLuaState 绑定虚拟机函数
- func (glvm *GLVm) BindLuaState(s *lua.LState) {
- s.SetGlobal("browser_reset", s.NewFunction(func(l *lua.LState) int {
- glvm.ResetBrowser()
- return 0
- }))
- s.SetGlobal("browser_savelog", s.NewFunction(func(l *lua.LState) int {
- text := l.ToString(-1)
- qu.Debug("log:", text)
- now := time.Now()
- glvm.LogsFile.Write([]byte(fmt.Sprintf("%s%s%s%s", qu.FormatDate(&now, qu.Date_Full_Layout), "---", text, "\n")))
- return 0
- }))
- }
- func (glvm *GLVm) CloseTabs() {
- if glvm.B != nil && glvm.B.CancelFn != nil {
- glvm.B.CancelFn()
- glvm.B.Ctx = nil
- glvm.B.CancelFn = nil
- glvm.B = nil
- }
- }
- // findTab 根据标题、url找tab
- func (b *GLBrowser) findTabContext(tabTitle, tabUrl string, timeoutInt64 int64) (ctx context.Context, err error) {
- if b.Ctx != nil {
- if timeoutInt64 == 0 {
- timeoutInt64 = 5000
- }
- timeout := time.Duration(timeoutInt64) * time.Millisecond
- if tabTitle == "" && tabUrl == "" {
- ctx, _ = context.WithTimeout(b.Ctx, timeout)
- return ctx, nil
- } else {
- ts, err := chromedp.Targets(b.Ctx)
- if err != nil {
- return nil, err
- }
- for _, t := range ts {
- if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) || (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
- // log.Printf("find tab param<title,url>: %s %s found %s %s", tabTitle, tabUrl,
- // t.Title, t.URL)
- newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
- ctx, _ = context.WithTimeout(newCtx, timeout)
- return ctx, nil
- }
- }
- }
- return nil, errors.New("can't find tab")
- }
- return nil, errors.New("context is error")
- }
- // CloseTabs 关闭页面
- func (b *GLBrowser) CloseTabs(tabTitle, tabUrl string, timeoutInt64 int64) (err error) {
- if timeoutInt64 == 0 {
- timeoutInt64 = 5
- }
- timeout := time.Duration(timeoutInt64) * time.Millisecond
- ts, err := chromedp.Targets(b.Ctx)
- if err != nil {
- return err
- }
- for _, t := range ts {
- if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) || (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
- newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
- ctx, _ := context.WithTimeout(newCtx, timeout)
- chromedp.Run(
- ctx,
- page.Close(),
- )
- }
- }
- return nil
- }
- // Navigate 导航到指定网址
- func (b *GLBrowser) Navigate(tabTitle string, tabUrl string, isNewTab bool, targetUrl string, timeout int64) (err error) {
- ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- //新标签页
- if isNewTab {
- ctx, _ = chromedp.NewContext(ctx)
- }
- //
- return chromedp.Run(ctx,
- chromedp.Navigate(targetUrl))
- }
- // Navigate 导航到指定网址,并保存请求资源,如图片等
- func (b *GLBrowser) NavigateAndSaveRes(tabTitle string, tabUrl string, timeout int64, isNewTab bool, targetUrl string, saveFileTypeList, save2dir string) (err error) {
- ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- //新标签页
- if isNewTab {
- ctx, _ = chromedp.NewContext(ctx)
- }
- //
- saveFileType := strings.Split(saveFileTypeList, " ")
- isNeedRes := func(fileType string) bool {
- for _, v := range saveFileType {
- if strings.Contains(fileType, v) {
- return true
- }
- }
- return false
- }
- fnURL2FileName := func(requestURL string) string {
- u, err := url.Parse(requestURL)
- if err != nil {
- return ""
- }
- _, filename := filepath.Split(u.Path)
- return filename
- }
- var cache = map[network.RequestID]string{}
- chromedp.ListenTarget(ctx, func(v interface{}) {
- switch ev := v.(type) {
- case *network.EventRequestWillBeSent: //准备下载
- cache[ev.RequestID] = ev.Request.URL
- case *network.EventResponseReceived: //检查回应头的contenttype
- contentType, _ := ev.Response.Headers["Content-Type"].(string)
- fmt.Println(contentType)
- if !isNeedRes(contentType) {
- delete(cache, ev.RequestID)
- }
- case *network.EventLoadingFinished: //下载完成
- if uri, ok := cache[ev.RequestID]; ok {
- filename := fnURL2FileName(uri)
- fmt.Println("save2file", filename)
- if filename != "" {
- filePath := filepath.Join(save2dir, filename)
- var buf []byte
- if err := chromedp.Run(ctx, chromedp.ActionFunc(func(ctx context.Context) error {
- var err error
- buf, err = network.GetResponseBody(ev.RequestID).Do(ctx)
- return err
- })); err == nil {
- os.WriteFile(filePath, buf, 0777)
- } else {
- fmt.Println(err.Error())
- }
- }
- }
- }
- })
- //
- err = chromedp.Run(ctx,
- chromedp.Navigate(targetUrl))
- //下载存储
- return err
- }
- // ExecuteJS 执行脚本
- func (b *GLBrowser) ExecuteJS(tabTitle, tabUrl, script string, ret interface{}, timeout int64) (err error) {
- ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- return chromedp.Run(ctx,
- chromedp.Evaluate(script, ret))
- }
- // Click 点击
- func (b *GLBrowser) Click(tabTitle, tabUrl, selector string, selectorType int, timeout int64) (err error) {
- ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- var act chromedp.QueryAction
- switch selectorType {
- case selector_type_id:
- act = chromedp.Click(selector, chromedp.ByID)
- case selector_type_query:
- act = chromedp.Click(selector, chromedp.ByQuery)
- case selector_type_search:
- act = chromedp.Click(selector, chromedp.BySearch)
- case selector_type_jspath:
- act = chromedp.Click(selector, chromedp.ByJSPath)
- default:
- act = chromedp.Click(selector, chromedp.ByQueryAll)
- }
- err = chromedp.Run(ctx,
- act)
- return err
- }
- // KeySend 键盘输入
- func (b *GLBrowser) KeySend(tabTitle, tabUrl, selector, sendStr string, selectorType int, timeout int64) (err error) {
- ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- var act chromedp.QueryAction
- switch selectorType {
- case selector_type_id:
- act = chromedp.SendKeys(selector, sendStr, chromedp.ByID)
- case selector_type_query:
- act = chromedp.SendKeys(selector, sendStr, chromedp.ByQuery)
- case selector_type_search:
- act = chromedp.SendKeys(selector, sendStr, chromedp.BySearch)
- case selector_type_jspath:
- act = chromedp.SendKeys(selector, sendStr, chromedp.ByJSPath)
- default:
- act = chromedp.SendKeys(selector, sendStr, chromedp.ByQueryAll)
- }
- return chromedp.Run(ctx,
- act)
- }
- // WaitVisible 等待元素可见
- func (b *GLBrowser) WaitVisible(tabTitle, tabUrl, selector string, selectorType int, timeout int64) error {
- ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- var act chromedp.QueryAction
- switch selectorType {
- case selector_type_id:
- act = chromedp.WaitVisible(selector, chromedp.ByID)
- case selector_type_query:
- act = chromedp.WaitVisible(selector, chromedp.ByQuery)
- case selector_type_search:
- act = chromedp.WaitVisible(selector, chromedp.BySearch)
- case selector_type_jspath:
- act = chromedp.WaitVisible(selector, chromedp.ByJSPath)
- default:
- act = chromedp.WaitVisible(selector, chromedp.ByQueryAll)
- }
- return chromedp.Run(ctx,
- act)
- }
- // 重置浏览器
- func (b *GLBrowser) Reset() {
- }
- // DownloadFile 只有在非headless模式下有效,与click方法其实是一致的
- func (b *GLBrowser) DownloadFile(tabTitle, tabUrl string, timeout int64, selector string, selectorType int, save2dir string) error {
- ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- var act chromedp.QueryAction
- switch selectorType {
- case selector_type_id:
- act = chromedp.Click(selector, chromedp.ByID)
- case selector_type_query:
- act = chromedp.Click(selector, chromedp.ByQuery)
- case selector_type_search:
- act = chromedp.Click(selector, chromedp.BySearch)
- case selector_type_jspath:
- act = chromedp.Click(selector, chromedp.ByJSPath)
- default:
- act = chromedp.Click(selector, chromedp.ByQueryAll)
- }
- return chromedp.Run(ctx,
- browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).WithDownloadPath(save2dir).WithEventsEnabled(true),
- act)
- }
- func (b *GLBrowser) AnalyzeCodeByPath(path, stype, head, cookie string, proxy bool) (code string, rh http.Header, rc []*http.Cookie) {
- //先用免费,为识别再用收费
- ok := false
- code, rh, rc, _, ok = getCodeByFree(path, stype, head, cookie, proxy) //自己的服务
- qu.Debug("Get Code By Free Result:", path, ok, code)
- if qu.IntAll(stype) > 0 && !ok {
- code, rh, rc = getCodeByPay(path, stype, head, cookie, proxy) //超级鹰收费
- }
- return
- }
- func getCodeByFree(path, stype, head, cookie string, proxy bool) (code string, respheader http.Header, respcookie []*http.Cookie, getCodeResp *req.Response, ok bool) {
- defer qu.Catch()
- client := req.C().
- SetTimeout(time.Duration(be.Cfg.ServerCodeTimeOut) * time.Second).
- SetTLSClientConfig(&tls.Config{
- Renegotiation: tls.RenegotiateOnceAsClient,
- InsecureSkipVerify: true,
- }) //忽略证书验证
- headers := map[string]string{}
- if head != "" {
- json.Unmarshal([]byte(head), &headers)
- }
- cookies := []*http.Cookie{}
- if cookie != "" {
- json.Unmarshal([]byte(cookie), &cookies)
- }
- for times := 1; times <= 6; times++ { //重试三次
- if times > 2 || proxy { //重试第4次开始,使用代理ip
- if stype == "-1" {
- return
- }
- proxyIp := be.GetProxyAddr() //获取代理地址
- qu.Debug("proxy:", path, proxyIp)
- client.SetProxyURL(proxyIp) //设置代理IP
- }
- request := client.R()
- if len(headers) > 0 {
- request.SetHeaders(headers)
- }
- if len(cookies) > 0 {
- request.SetCookies(cookies...)
- }
- //下载验证码图片
- var err error
- var resultByte []byte
- //address := be.Cfg.ServerCodeFreeAddressOcr
- if stype == "-1" { //传base64的图片
- resultByte = []byte(path)
- } else {
- if stype == "6001" { //计算类验证码解析接口地址
- //address = be.Cfg.ServerCodeFreeAddressArithmetic
- }
- getCodeResp, err = request.Get(path) //通过请求图片地址返回的byte
- resultByte = getCodeResp.Bytes()
- }
- if err != nil {
- qu.Debug("Get Code By Path Error: ", path, err)
- continue
- }
- code, err = getCode(resultByte, stype, true)
- if err == nil && code != "" {
- if getCodeResp != nil {
- respheader = getCodeResp.Header
- respcookie = getCodeResp.Cookies()
- }
- ok = true
- return
- }
- //解析验证码
- //codeResp, err := client.R().
- // SetHeader("accept", "application/json").
- // SetFileReader("file", "1", bytes.NewReader(resultByte)).
- // Post(address)
- //if err != nil {
- // qu.Debug("analysis code by path err: ", path, err)
- // continue
- //}
- //yzmResult := map[string]interface{}{}
- //json.Unmarshal(codeResp.Bytes(), &yzmResult)
- //qu.Debug(path, yzmResult)
- //if err != nil || yzmResult == nil {
- // continue
- //}
- //result := yzmResult["r"].(map[string]interface{})
- //yzm := fmt.Sprint(result["code"])
- //if yzm != "" {
- // if stype == "6001" || len(yzm) >= 4 {
- // code = yzm //长度小于4的视为识别错误
- // if getCodeResp != nil {
- // respheader = getCodeResp.Header
- // respcookie = getCodeResp.Cookies()
- // }
- // ok = true
- // return
- // }
- //}
- }
- return
- }
- func getCodeByPay(path, stype, head, cookie string, proxy bool) (code string, respheader http.Header, respcookie []*http.Cookie) {
- defer qu.Catch()
- client := req.C().
- SetTimeout(time.Duration(be.Cfg.ServerCodeTimeOut) * time.Second).
- SetTLSClientConfig(&tls.Config{
- Renegotiation: tls.RenegotiateOnceAsClient,
- InsecureSkipVerify: true,
- }) //忽略证书验证
- headers := map[string]string{}
- if head != "" {
- json.Unmarshal([]byte(head), &headers)
- }
- cookies := []*http.Cookie{}
- if cookie != "" {
- json.Unmarshal([]byte(cookie), &cookies)
- }
- for times := 1; times <= 2; times++ { //重试三次
- //atomic.AddInt64(&PyTimes, 1)
- if times > 1 || proxy { //重试第2次开始,使用代理ip
- proxyIp := be.GetProxyAddr() //获取代理地址
- qu.Debug("proxy:", path, proxyIp)
- client.SetProxyURL(proxyIp) //设置代理IP
- }
- request := client.R()
- if len(headers) > 0 {
- request.SetHeaders(headers)
- }
- if len(cookies) > 0 {
- request.SetCookies(cookies...)
- }
- //下载验证码图片
- getCodeResp, err := request.Get(path)
- //log.Println("respHeader---", getCodeResp.Header)
- //log.Println("respCookie---", getCodeResp.Cookies())
- if err != nil {
- qu.Debug("Get Code By Path Error: ", path, err)
- continue
- }
- code, err = getCode(getCodeResp.Bytes(), stype, false)
- if err == nil && code != "" {
- respheader = getCodeResp.Header
- respcookie = getCodeResp.Cookies()
- return
- }
- //解析验证码
- //data := map[string]string{
- // "grant_type": "",
- // "username": "jianyu001",
- // "password": "123qwe!A",
- // "scope": "",
- // "client_id": "",
- // "client_secret ": "",
- //}
- //codeResp, err := client.R().
- // SetHeader("accept", "application/json").
- // SetFileReader("file", "1", bytes.NewReader(getCodeResp.Bytes())).
- // SetFormData(data).
- // Post(be.Cfg.ServerCodeAddress + stype)
- //if err != nil {
- // qu.Debug("analysis code by path err: ", path, err)
- // continue
- //}
- //codeResult := map[string]interface{}{}
- //json.Unmarshal(codeResp.Bytes(), &codeResult)
- //qu.Debug("codeResult:", codeResult)
- //qu.Debug("codeResult:", result)
- //if err != nil || result == nil {
- // continue
- //}
- //if yzm, ok := result["r"].(map[string]interface{})["pic_str"].(string); ok && yzm != "" && len(yzm) >= 4 {
- // code = yzm
- // respheader = getCodeResp.Header
- // respcookie = getCodeResp.Cookies()
- // return
- //}
- }
- return
- }
- func getCode(b []byte, stype string, free bool) (code string, err error) {
- qu.Debug("验证码类型:", stype, ",是否免费:", free)
- //解析验证码
- request := req.C().R().
- SetHeader("accept", "application/json").
- SetFileReader("file", "1", bytes.NewReader(b))
- address := be.Cfg.ServerCodeFreeAddressOcr
- if !free {
- data := map[string]string{
- "grant_type": "",
- "username": be.Cfg.Username,
- "password": be.Cfg.Password,
- "scope": "",
- "client_id": "",
- "client_secret ": "",
- }
- request.SetFormData(data)
- address = be.Cfg.ServerCodeAddress + stype
- } else if stype == "6001" { //计算类验证码解析接口地址
- address = be.Cfg.ServerCodeFreeAddressArithmetic
- }
- qu.Debug("address:", address)
- var resp *req.Response
- resp, err = request.Post(address)
- if err != nil {
- qu.Debug("analysis code by path err: ", err)
- return
- }
- var result map[string]interface{}
- err = json.Unmarshal(resp.Bytes(), &result)
- qu.Debug("验证码解析结果:", free, result)
- if err == nil && result != nil {
- if free {
- r, _ := result["r"].(map[string]interface{})
- codeTmp := qu.ObjToString(r["code"])
- if len(codeTmp) >= 4 || stype == "6001" && codeTmp != "" {
- return codeTmp, nil
- }
- } else {
- if codeTmp, ok := result["r"].(map[string]interface{})["pic_str"].(string); ok && codeTmp != "" {
- if stype == "6001" || len(codeTmp) >= 4 {
- return codeTmp, nil
- }
- }
- }
- }
- return
- }
- // AnalyzeCodeScreenShot 截屏解析验证码
- func (b *GLBrowser) AnalyzeCodeScreenShot(tabTitle, tabUrl, selector string, selectorType int, timeout int64, stype string) (code string, err error) {
- ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return
- }
- var act chromedp.QueryAction
- var bt []byte
- switch selectorType {
- case selector_type_id:
- act = chromedp.Screenshot(selector, &bt, chromedp.ByID)
- case selector_type_query:
- act = chromedp.Screenshot(selector, &bt, chromedp.ByQuery)
- //case selector_type_search:
- //case selector_type_jspath:
- default:
- //option = chromedp.ByQueryAll
- chromedp.Screenshot(selector, &bt, chromedp.ByQueryAll)
- }
- err = chromedp.Run(ctx,
- act,
- )
- //保存
- if err = ioutil.WriteFile("code.png", bt, 0755); err != nil {
- qu.Debug(err)
- }
- code, err = getCode(bt, stype, true) //免费
- if err != nil || code == "" {
- code, err = getCode(bt, stype, false) //收费
- }
- return
- }
- // BindLuaState
- func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
- //执行暂停
- s.SetGlobal("browser_sleep", s.NewFunction(func(l *lua.LState) int {
- fmt.Println("---browser_sleep---")
- timeout := l.ToInt64(-1)
- if timeout == 0 {
- timeout = 5
- }
- time.Sleep(time.Duration(timeout) * time.Millisecond)
- return 0
- }))
- //关闭tabl页
- s.SetGlobal("browser_closetabs", s.NewFunction(func(l *lua.LState) int {
- fmt.Println("---browser_closetabs---")
- timeout := l.ToInt64(-3)
- tabTitle := l.ToString(-2)
- tabUrl := l.ToString(-1)
- if timeout == 0 {
- timeout = 5
- }
- b.CloseTabs(tabTitle, tabUrl, timeout)
- return 0
- }))
- //注册打开地址
- s.SetGlobal("browser_navagite", s.NewFunction(func(l *lua.LState) int {
- fmt.Println("---browser_navagite---")
- tabTitle := l.ToString(-5) //指定标签页title
- tabUrl := l.ToString(-4) //指定标签页url
- isNewTab := l.ToBool(-3) //是否打开新的标签页
- timeout := l.ToInt64(-2) //网页打开的超时时间
- targetUrl := l.ToString(-1) //打开网页的链接
- if err := b.Navigate(tabTitle, tabUrl, isNewTab, targetUrl, timeout); err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //执行浏览器端js
- s.SetGlobal("browser_executejs", s.NewFunction(func(l *lua.LState) int {
- fmt.Println("---browser_executejs---")
- tabTitle := l.ToString(-5)
- tabUrl := l.ToString(-4)
- timeout := l.ToInt64(-3)
- returnType := l.ToInt(-2) //返回数据类型
- script := l.ToString(-1) //执行的js
- switch returnType {
- case execute_return_type_string: //返回string
- var ret string
- if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
- l.Push(lua.LString("ok"))
- l.Push(lua.LString(ret))
- } else {
- l.Push(lua.LString("err"))
- l.Push(lua.LString(err.Error()))
- }
- case execute_return_type_list: //返回list
- var ret = make([]interface{}, 0, 0)
- var tmp = make(map[string]interface{})
- if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
- for i, v := range ret {
- tmp[strconv.Itoa(i)] = v
- }
- l.Push(lua.LString("ok"))
- l.Push(MapToTable(tmp))
- } else {
- l.Push(lua.LString("err"))
- l.Push(lua.LString(err.Error()))
- }
- case execute_return_type_table: //返回table
- var ret = make(map[string]interface{})
- if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
- l.Push(lua.LString("ok"))
- l.Push(MapToTable(ret))
- } else {
- l.Push(lua.LString("err"))
- l.Push(lua.LString(err.Error()))
- }
- }
- return 2
- }))
- //按键
- s.SetGlobal("browser_keysend", s.NewFunction(func(l *lua.LState) int {
- fmt.Println("---browser_keysend---")
- tabTitle := l.ToString(-6)
- tabUrl := l.ToString(-5)
- timeout := l.ToInt64(-4)
- words := l.ToString(-3)
- selectorType := l.ToInt(-2)
- selector := l.ToString(-1)
- fmt.Println(selector, words, selectorType, timeout)
- err := b.KeySend(tabTitle, tabUrl, selector, words, selectorType, timeout)
- if err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //点击
- s.SetGlobal("browser_click", s.NewFunction(func(l *lua.LState) int {
- fmt.Println("---browser_click---")
- tabTitle := l.ToString(-5)
- tabUrl := l.ToString(-4)
- timeout := l.ToInt64(-3)
- selectorType := l.ToInt(-2)
- selector := l.ToString(-1)
- err := b.Click(tabTitle, tabUrl, selector, selectorType, timeout)
- if err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //等待元素加载
- s.SetGlobal("browser_waitvisible", s.NewFunction(func(l *lua.LState) int {
- fmt.Println("---browser_waitvisible---")
- tabTitle := l.ToString(-5)
- tabUrl := l.ToString(-4)
- timeout := l.ToInt64(-3)
- selectorType := l.ToInt(-2) //选择器类型
- selector := l.ToString(-1) //选择器
- err := b.WaitVisible(tabTitle, tabUrl, selector, selectorType, timeout)
- if err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //下载附件
- s.SetGlobal("browser_downloadfile", s.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-6)
- tabUrl := l.ToString(-5)
- timeout := l.ToInt64(-4)
- selectorType := l.ToInt(-3)
- selector := l.ToString(-2)
- save2dir := l.ToString(-1)
- err := b.DownloadFile(tabTitle, tabUrl, timeout, selector, selectorType, save2dir)
- if err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //注册打开地址
- s.SetGlobal("browser_navagite_download_res", s.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-7)
- tabUrl := l.ToString(-6)
- timeout := l.ToInt64(-5)
- isNewTab := l.ToBool(-4)
- targetUrl := l.ToString(-3)
- saveFileTypeList := l.ToString(-2)
- savedir := l.ToString(-1)
- if err := b.NavigateAndSaveRes(tabTitle, tabUrl, timeout, isNewTab, targetUrl, saveFileTypeList, savedir); err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //s.SetGlobal("browser_analyzecode_bypath", s.NewFunction(func(S *lua.LState) int {
- // proxy := S.ToBool(-5)
- // url := S.ToString(-4)
- // stype := S.ToString(-3)
- // head := S.ToTable(-2)
- // cookie := S.ToString(-1)
- // headMap := TableToMap(head)
- // //qu.Debug("cookie----------", cookie)
- // //qu.Debug("headMap----------", headMap)
- // headJsonStr := ""
- // headByte, err := json.Marshal(headMap)
- // if err == nil {
- // headJsonStr = string(headByte)
- // }
- // code, respHead, respCookie := b.AnalyzeCodeByPath(url, stype, headJsonStr, cookie, proxy)
- // rhead, _ := json.Marshal(respHead)
- // respHeadMap := map[string]interface{}{}
- // json.Unmarshal(rhead, &respHeadMap)
- // hTable := MapToTable(respHeadMap)
- //
- // rcookie, _ := json.Marshal(respCookie)
- // respCookieMap := []map[string]interface{}{}
- // json.Unmarshal(rcookie, &respCookieMap)
- // cTable := MapToTable(map[string]interface{}{"cookie": respCookieMap})
- // S.Push(lua.LString(code))
- // S.Push(hTable)
- // S.Push(cTable.RawGetString("cookie"))
- // return 3
- //}))
- //发布时间格式化
- s.SetGlobal("browser_publishtime", s.NewFunction(func(l *lua.LState) int {
- text := l.ToString(-1)
- publishtime := getPublitime(text)
- l.Push(lua.LString(publishtime))
- return 1
- }))
- //截屏功能
- s.SetGlobal("browser_analyzecode_screenshot", s.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-6)
- tabUrl := l.ToString(-5)
- stype := l.ToString(-4)
- timeout := l.ToInt64(-3)
- selectorType := l.ToInt(-2)
- selector := l.ToString(-1)
- code, _ := b.AnalyzeCodeScreenShot(tabTitle, tabUrl, selector, selectorType, timeout, stype)
- l.Push(lua.LString(code))
- return 1
- }))
- //保存数据
- s.SetGlobal("browser_savedata", s.NewFunction(func(l *lua.LState) int {
- //fmt.Println("---browser_savedata---")
- pageType := l.ToString(-2)
- data := l.ToTable(-1)
- result := TableToMap(data)
- if pageType == "list" {
- result["recordid"] = recordId
- }
- DataCache <- result
- return 0
- }))
- //获取数据
- s.SetGlobal("browser_getdata", s.NewFunction(func(l *lua.LState) int {
- fmt.Println("---browser_getdata---")
- num := l.ToInt(-1) //获取多少条数据
- count := len(Datas)
- if count == 0 {
- l.Push(lua.LString("err"))
- l.Push(lua.LString("当前可下载量为0"))
- } else {
- if count < num {
- num = count
- }
- data := Datas[:num]
- Datas = Datas[num:]
- tMap := MapToTable(map[string]interface{}{"data": data})
- l.Push(lua.LString("ok"))
- l.Push(tMap.RawGetString("data"))
- }
- return 2
- }))
- }
|