script.go 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940
  1. package script
  2. import (
  3. "bytes"
  4. "context"
  5. "crypto/tls"
  6. "encoding/json"
  7. "errors"
  8. "fmt"
  9. "github.com/chromedp/cdproto/browser"
  10. "github.com/chromedp/cdproto/network"
  11. "github.com/chromedp/cdproto/page"
  12. "github.com/chromedp/chromedp"
  13. "github.com/imroc/req/v3"
  14. "github.com/yuin/gopher-lua"
  15. "github.com/yuin/gopher-lua/parse"
  16. "io/ioutil"
  17. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  18. "net/http"
  19. "net/url"
  20. "os"
  21. "path/filepath"
  22. "spider_creator/backend"
  23. be "spider_creator/backend"
  24. "strconv"
  25. "strings"
  26. "time"
  27. )
  28. const (
  29. selector_type_id = 0
  30. selector_type_query = 1
  31. selector_type_search = 2
  32. selector_type_jspath = 3
  33. selector_type_query_all = 4
  34. execute_return_type_string = 0
  35. execute_return_type_list = 1
  36. execute_return_type_table = 2
  37. qlm_list_lua = "/qlm_list.lua"
  38. qlm_detail_lua = "/qlm_detail.lua"
  39. )
  40. var (
  41. DataCache = make(chan map[string]interface{}, 2000)
  42. Datas []map[string]interface{}
  43. )
  44. type GLVm struct {
  45. ScriptDir string
  46. LogsDir string
  47. LogsFile *os.File
  48. Dnf backend.EventNotifyFace
  49. Headless bool
  50. ShowImage bool
  51. ProxyServer bool
  52. ProxyAddr string
  53. B *GLBrowser
  54. ScriptRunning bool //控制一次只能执行一个脚本
  55. DataSaveOver chan bool
  56. }
  57. type GLBrowser struct {
  58. Ctx context.Context
  59. CancelFn context.CancelFunc
  60. }
  61. func NewGLVM(scriptDir, logsDir string, dnf be.EventNotifyFace) *GLVm {
  62. return &GLVm{
  63. ScriptDir: scriptDir,
  64. LogsDir: logsDir,
  65. Dnf: dnf,
  66. DataSaveOver: make(chan bool, 1),
  67. }
  68. }
  69. // LoadScript 加载脚本
  70. func (glvm *GLVm) LoadScript(page string) string {
  71. var path string
  72. if page == "list" {
  73. path = glvm.ScriptDir + qlm_list_lua
  74. } else if page == "detail" {
  75. path = glvm.ScriptDir + qlm_detail_lua
  76. }
  77. bs, err := os.ReadFile(path)
  78. if err != nil {
  79. qu.Debug(path, "脚本加载失败...")
  80. }
  81. return string(bs)
  82. }
  83. // RunScript 执行lua代码
  84. func (glvm *GLVm) RunScript(script, recordId string) error {
  85. defer qu.Catch()
  86. var s *lua.LState = lua.NewState()
  87. defer s.Close()
  88. //日志文件
  89. now := time.Now()
  90. path := glvm.LogsDir + fmt.Sprintf("/%s.log", qu.FormatDate(&now, qu.Date_Short_Layout))
  91. qu.Debug("log path:", path)
  92. file, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0666)
  93. if err != nil {
  94. qu.Debug("日志创建失败:", err)
  95. return err
  96. }
  97. glvm.LogsFile = file
  98. defer glvm.LogsFile.Close()
  99. //方法绑定
  100. glvm.ResetBrowser() //先创建浏览器对象
  101. glvm.BindLuaState(s) //绑定虚拟机函数
  102. glvm.B.BindLuaState(s, recordId)
  103. defer func() {
  104. if b := glvm.B; b != nil {
  105. b.CancelFn()
  106. b.Ctx = nil
  107. b.CancelFn = nil
  108. b = nil
  109. }
  110. }()
  111. reader := strings.NewReader(script)
  112. chunk, err := parse.Parse(reader, "code")
  113. if err != nil {
  114. return err
  115. }
  116. proto, err := lua.Compile(chunk, script)
  117. if err != nil {
  118. return err
  119. }
  120. lfunc := s.NewFunctionFromProto(proto)
  121. s.Push(lfunc)
  122. s.Call(0, 0)
  123. return nil
  124. }
  125. // ResetBrowser 重置浏览器
  126. func (glvm *GLVm) ResetBrowser() {
  127. if glvm.B != nil && glvm.B.CancelFn != nil {
  128. glvm.B.CancelFn()
  129. glvm.B.Ctx = nil
  130. glvm.B.CancelFn = nil
  131. }
  132. _, _, _, _, ctx, incCancelFn := backend.NewBrowser(glvm.Headless, glvm.ShowImage, glvm.ProxyServer, "http://")
  133. b := &GLBrowser{
  134. Ctx: ctx,
  135. CancelFn: incCancelFn,
  136. }
  137. if glvm.B == nil {
  138. glvm.B = b
  139. } else {
  140. glvm.B.Ctx, glvm.B.CancelFn = b.Ctx, b.CancelFn
  141. }
  142. }
  143. // BindLuaState 绑定虚拟机函数
  144. func (glvm *GLVm) BindLuaState(s *lua.LState) {
  145. s.SetGlobal("browser_reset", s.NewFunction(func(l *lua.LState) int {
  146. glvm.ResetBrowser()
  147. return 0
  148. }))
  149. s.SetGlobal("browser_savelog", s.NewFunction(func(l *lua.LState) int {
  150. text := l.ToString(-1)
  151. qu.Debug("log:", text)
  152. now := time.Now()
  153. glvm.LogsFile.Write([]byte(fmt.Sprintf("%s%s%s%s", qu.FormatDate(&now, qu.Date_Full_Layout), "---", text, "\n")))
  154. return 0
  155. }))
  156. }
  157. func (glvm *GLVm) CloseTabs() {
  158. if glvm.B != nil && glvm.B.CancelFn != nil {
  159. glvm.B.CancelFn()
  160. glvm.B.Ctx = nil
  161. glvm.B.CancelFn = nil
  162. glvm.B = nil
  163. }
  164. }
  165. // findTab 根据标题、url找tab
  166. func (b *GLBrowser) findTabContext(tabTitle, tabUrl string, timeoutInt64 int64) (ctx context.Context, err error) {
  167. if b.Ctx != nil {
  168. if timeoutInt64 == 0 {
  169. timeoutInt64 = 5000
  170. }
  171. timeout := time.Duration(timeoutInt64) * time.Millisecond
  172. if tabTitle == "" && tabUrl == "" {
  173. ctx, _ = context.WithTimeout(b.Ctx, timeout)
  174. return ctx, nil
  175. } else {
  176. ts, err := chromedp.Targets(b.Ctx)
  177. if err != nil {
  178. return nil, err
  179. }
  180. for _, t := range ts {
  181. if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) || (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
  182. // log.Printf("find tab param<title,url>: %s %s found %s %s", tabTitle, tabUrl,
  183. // t.Title, t.URL)
  184. newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
  185. ctx, _ = context.WithTimeout(newCtx, timeout)
  186. return ctx, nil
  187. }
  188. }
  189. }
  190. return nil, errors.New("can't find tab")
  191. }
  192. return nil, errors.New("context is error")
  193. }
  194. // CloseTabs 关闭页面
  195. func (b *GLBrowser) CloseTabs(tabTitle, tabUrl string, timeoutInt64 int64) (err error) {
  196. if timeoutInt64 == 0 {
  197. timeoutInt64 = 5
  198. }
  199. timeout := time.Duration(timeoutInt64) * time.Millisecond
  200. ts, err := chromedp.Targets(b.Ctx)
  201. if err != nil {
  202. return err
  203. }
  204. for _, t := range ts {
  205. if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) || (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
  206. newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
  207. ctx, _ := context.WithTimeout(newCtx, timeout)
  208. chromedp.Run(
  209. ctx,
  210. page.Close(),
  211. )
  212. }
  213. }
  214. return nil
  215. }
  216. // Navigate 导航到指定网址
  217. func (b *GLBrowser) Navigate(tabTitle string, tabUrl string, isNewTab bool, targetUrl string, timeout int64) (err error) {
  218. ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
  219. if err != nil {
  220. return err
  221. }
  222. //新标签页
  223. if isNewTab {
  224. ctx, _ = chromedp.NewContext(ctx)
  225. }
  226. //
  227. return chromedp.Run(ctx,
  228. chromedp.Navigate(targetUrl))
  229. }
  230. // Navigate 导航到指定网址,并保存请求资源,如图片等
  231. func (b *GLBrowser) NavigateAndSaveRes(tabTitle string, tabUrl string, timeout int64, isNewTab bool, targetUrl string, saveFileTypeList, save2dir string) (err error) {
  232. ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
  233. if err != nil {
  234. return err
  235. }
  236. //新标签页
  237. if isNewTab {
  238. ctx, _ = chromedp.NewContext(ctx)
  239. }
  240. //
  241. saveFileType := strings.Split(saveFileTypeList, " ")
  242. isNeedRes := func(fileType string) bool {
  243. for _, v := range saveFileType {
  244. if strings.Contains(fileType, v) {
  245. return true
  246. }
  247. }
  248. return false
  249. }
  250. fnURL2FileName := func(requestURL string) string {
  251. u, err := url.Parse(requestURL)
  252. if err != nil {
  253. return ""
  254. }
  255. _, filename := filepath.Split(u.Path)
  256. return filename
  257. }
  258. var cache = map[network.RequestID]string{}
  259. chromedp.ListenTarget(ctx, func(v interface{}) {
  260. switch ev := v.(type) {
  261. case *network.EventRequestWillBeSent: //准备下载
  262. cache[ev.RequestID] = ev.Request.URL
  263. case *network.EventResponseReceived: //检查回应头的contenttype
  264. contentType, _ := ev.Response.Headers["Content-Type"].(string)
  265. fmt.Println(contentType)
  266. if !isNeedRes(contentType) {
  267. delete(cache, ev.RequestID)
  268. }
  269. case *network.EventLoadingFinished: //下载完成
  270. if uri, ok := cache[ev.RequestID]; ok {
  271. filename := fnURL2FileName(uri)
  272. fmt.Println("save2file", filename)
  273. if filename != "" {
  274. filePath := filepath.Join(save2dir, filename)
  275. var buf []byte
  276. if err := chromedp.Run(ctx, chromedp.ActionFunc(func(ctx context.Context) error {
  277. var err error
  278. buf, err = network.GetResponseBody(ev.RequestID).Do(ctx)
  279. return err
  280. })); err == nil {
  281. os.WriteFile(filePath, buf, 0777)
  282. } else {
  283. fmt.Println(err.Error())
  284. }
  285. }
  286. }
  287. }
  288. })
  289. //
  290. err = chromedp.Run(ctx,
  291. chromedp.Navigate(targetUrl))
  292. //下载存储
  293. return err
  294. }
  295. // ExecuteJS 执行脚本
  296. func (b *GLBrowser) ExecuteJS(tabTitle, tabUrl, script string, ret interface{}, timeout int64) (err error) {
  297. ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
  298. if err != nil {
  299. return err
  300. }
  301. return chromedp.Run(ctx,
  302. chromedp.Evaluate(script, ret))
  303. }
  304. // Click 点击
  305. func (b *GLBrowser) Click(tabTitle, tabUrl, selector string, selectorType int, timeout int64) (err error) {
  306. ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
  307. if err != nil {
  308. return err
  309. }
  310. var act chromedp.QueryAction
  311. switch selectorType {
  312. case selector_type_id:
  313. act = chromedp.Click(selector, chromedp.ByID)
  314. case selector_type_query:
  315. act = chromedp.Click(selector, chromedp.ByQuery)
  316. case selector_type_search:
  317. act = chromedp.Click(selector, chromedp.BySearch)
  318. case selector_type_jspath:
  319. act = chromedp.Click(selector, chromedp.ByJSPath)
  320. default:
  321. act = chromedp.Click(selector, chromedp.ByQueryAll)
  322. }
  323. err = chromedp.Run(ctx,
  324. act)
  325. return err
  326. }
  327. // KeySend 键盘输入
  328. func (b *GLBrowser) KeySend(tabTitle, tabUrl, selector, sendStr string, selectorType int, timeout int64) (err error) {
  329. ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
  330. if err != nil {
  331. return err
  332. }
  333. var act chromedp.QueryAction
  334. switch selectorType {
  335. case selector_type_id:
  336. act = chromedp.SendKeys(selector, sendStr, chromedp.ByID)
  337. case selector_type_query:
  338. act = chromedp.SendKeys(selector, sendStr, chromedp.ByQuery)
  339. case selector_type_search:
  340. act = chromedp.SendKeys(selector, sendStr, chromedp.BySearch)
  341. case selector_type_jspath:
  342. act = chromedp.SendKeys(selector, sendStr, chromedp.ByJSPath)
  343. default:
  344. act = chromedp.SendKeys(selector, sendStr, chromedp.ByQueryAll)
  345. }
  346. return chromedp.Run(ctx,
  347. act)
  348. }
  349. // WaitVisible 等待元素可见
  350. func (b *GLBrowser) WaitVisible(tabTitle, tabUrl, selector string, selectorType int, timeout int64) error {
  351. ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
  352. if err != nil {
  353. return err
  354. }
  355. var act chromedp.QueryAction
  356. switch selectorType {
  357. case selector_type_id:
  358. act = chromedp.WaitVisible(selector, chromedp.ByID)
  359. case selector_type_query:
  360. act = chromedp.WaitVisible(selector, chromedp.ByQuery)
  361. case selector_type_search:
  362. act = chromedp.WaitVisible(selector, chromedp.BySearch)
  363. case selector_type_jspath:
  364. act = chromedp.WaitVisible(selector, chromedp.ByJSPath)
  365. default:
  366. act = chromedp.WaitVisible(selector, chromedp.ByQueryAll)
  367. }
  368. return chromedp.Run(ctx,
  369. act)
  370. }
  371. // 重置浏览器
  372. func (b *GLBrowser) Reset() {
  373. }
  374. // DownloadFile 只有在非headless模式下有效,与click方法其实是一致的
  375. func (b *GLBrowser) DownloadFile(tabTitle, tabUrl string, timeout int64, selector string, selectorType int, save2dir string) error {
  376. ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
  377. if err != nil {
  378. return err
  379. }
  380. var act chromedp.QueryAction
  381. switch selectorType {
  382. case selector_type_id:
  383. act = chromedp.Click(selector, chromedp.ByID)
  384. case selector_type_query:
  385. act = chromedp.Click(selector, chromedp.ByQuery)
  386. case selector_type_search:
  387. act = chromedp.Click(selector, chromedp.BySearch)
  388. case selector_type_jspath:
  389. act = chromedp.Click(selector, chromedp.ByJSPath)
  390. default:
  391. act = chromedp.Click(selector, chromedp.ByQueryAll)
  392. }
  393. return chromedp.Run(ctx,
  394. browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).WithDownloadPath(save2dir).WithEventsEnabled(true),
  395. act)
  396. }
  397. func (b *GLBrowser) AnalyzeCodeByPath(path, stype, head, cookie string, proxy bool) (code string, rh http.Header, rc []*http.Cookie) {
  398. //先用免费,为识别再用收费
  399. ok := false
  400. code, rh, rc, _, ok = getCodeByFree(path, stype, head, cookie, proxy) //自己的服务
  401. qu.Debug("Get Code By Free Result:", path, ok, code)
  402. if qu.IntAll(stype) > 0 && !ok {
  403. code, rh, rc = getCodeByPay(path, stype, head, cookie, proxy) //超级鹰收费
  404. }
  405. return
  406. }
  407. func getCodeByFree(path, stype, head, cookie string, proxy bool) (code string, respheader http.Header, respcookie []*http.Cookie, getCodeResp *req.Response, ok bool) {
  408. defer qu.Catch()
  409. client := req.C().
  410. SetTimeout(time.Duration(be.Cfg.ServerCodeTimeOut) * time.Second).
  411. SetTLSClientConfig(&tls.Config{
  412. Renegotiation: tls.RenegotiateOnceAsClient,
  413. InsecureSkipVerify: true,
  414. }) //忽略证书验证
  415. headers := map[string]string{}
  416. if head != "" {
  417. json.Unmarshal([]byte(head), &headers)
  418. }
  419. cookies := []*http.Cookie{}
  420. if cookie != "" {
  421. json.Unmarshal([]byte(cookie), &cookies)
  422. }
  423. for times := 1; times <= 6; times++ { //重试三次
  424. if times > 2 || proxy { //重试第4次开始,使用代理ip
  425. if stype == "-1" {
  426. return
  427. }
  428. proxyIp := be.GetProxyAddr() //获取代理地址
  429. qu.Debug("proxy:", path, proxyIp)
  430. client.SetProxyURL(proxyIp) //设置代理IP
  431. }
  432. request := client.R()
  433. if len(headers) > 0 {
  434. request.SetHeaders(headers)
  435. }
  436. if len(cookies) > 0 {
  437. request.SetCookies(cookies...)
  438. }
  439. //下载验证码图片
  440. var err error
  441. var resultByte []byte
  442. //address := be.Cfg.ServerCodeFreeAddressOcr
  443. if stype == "-1" { //传base64的图片
  444. resultByte = []byte(path)
  445. } else {
  446. if stype == "6001" { //计算类验证码解析接口地址
  447. //address = be.Cfg.ServerCodeFreeAddressArithmetic
  448. }
  449. getCodeResp, err = request.Get(path) //通过请求图片地址返回的byte
  450. resultByte = getCodeResp.Bytes()
  451. }
  452. if err != nil {
  453. qu.Debug("Get Code By Path Error: ", path, err)
  454. continue
  455. }
  456. code, err = getCode(resultByte, stype, true)
  457. if err == nil && code != "" {
  458. if getCodeResp != nil {
  459. respheader = getCodeResp.Header
  460. respcookie = getCodeResp.Cookies()
  461. }
  462. ok = true
  463. return
  464. }
  465. //解析验证码
  466. //codeResp, err := client.R().
  467. // SetHeader("accept", "application/json").
  468. // SetFileReader("file", "1", bytes.NewReader(resultByte)).
  469. // Post(address)
  470. //if err != nil {
  471. // qu.Debug("analysis code by path err: ", path, err)
  472. // continue
  473. //}
  474. //yzmResult := map[string]interface{}{}
  475. //json.Unmarshal(codeResp.Bytes(), &yzmResult)
  476. //qu.Debug(path, yzmResult)
  477. //if err != nil || yzmResult == nil {
  478. // continue
  479. //}
  480. //result := yzmResult["r"].(map[string]interface{})
  481. //yzm := fmt.Sprint(result["code"])
  482. //if yzm != "" {
  483. // if stype == "6001" || len(yzm) >= 4 {
  484. // code = yzm //长度小于4的视为识别错误
  485. // if getCodeResp != nil {
  486. // respheader = getCodeResp.Header
  487. // respcookie = getCodeResp.Cookies()
  488. // }
  489. // ok = true
  490. // return
  491. // }
  492. //}
  493. }
  494. return
  495. }
  496. func getCodeByPay(path, stype, head, cookie string, proxy bool) (code string, respheader http.Header, respcookie []*http.Cookie) {
  497. defer qu.Catch()
  498. client := req.C().
  499. SetTimeout(time.Duration(be.Cfg.ServerCodeTimeOut) * time.Second).
  500. SetTLSClientConfig(&tls.Config{
  501. Renegotiation: tls.RenegotiateOnceAsClient,
  502. InsecureSkipVerify: true,
  503. }) //忽略证书验证
  504. headers := map[string]string{}
  505. if head != "" {
  506. json.Unmarshal([]byte(head), &headers)
  507. }
  508. cookies := []*http.Cookie{}
  509. if cookie != "" {
  510. json.Unmarshal([]byte(cookie), &cookies)
  511. }
  512. for times := 1; times <= 2; times++ { //重试三次
  513. //atomic.AddInt64(&PyTimes, 1)
  514. if times > 1 || proxy { //重试第2次开始,使用代理ip
  515. proxyIp := be.GetProxyAddr() //获取代理地址
  516. qu.Debug("proxy:", path, proxyIp)
  517. client.SetProxyURL(proxyIp) //设置代理IP
  518. }
  519. request := client.R()
  520. if len(headers) > 0 {
  521. request.SetHeaders(headers)
  522. }
  523. if len(cookies) > 0 {
  524. request.SetCookies(cookies...)
  525. }
  526. //下载验证码图片
  527. getCodeResp, err := request.Get(path)
  528. //log.Println("respHeader---", getCodeResp.Header)
  529. //log.Println("respCookie---", getCodeResp.Cookies())
  530. if err != nil {
  531. qu.Debug("Get Code By Path Error: ", path, err)
  532. continue
  533. }
  534. code, err = getCode(getCodeResp.Bytes(), stype, false)
  535. if err == nil && code != "" {
  536. respheader = getCodeResp.Header
  537. respcookie = getCodeResp.Cookies()
  538. return
  539. }
  540. //解析验证码
  541. //data := map[string]string{
  542. // "grant_type": "",
  543. // "username": "jianyu001",
  544. // "password": "123qwe!A",
  545. // "scope": "",
  546. // "client_id": "",
  547. // "client_secret ": "",
  548. //}
  549. //codeResp, err := client.R().
  550. // SetHeader("accept", "application/json").
  551. // SetFileReader("file", "1", bytes.NewReader(getCodeResp.Bytes())).
  552. // SetFormData(data).
  553. // Post(be.Cfg.ServerCodeAddress + stype)
  554. //if err != nil {
  555. // qu.Debug("analysis code by path err: ", path, err)
  556. // continue
  557. //}
  558. //codeResult := map[string]interface{}{}
  559. //json.Unmarshal(codeResp.Bytes(), &codeResult)
  560. //qu.Debug("codeResult:", codeResult)
  561. //qu.Debug("codeResult:", result)
  562. //if err != nil || result == nil {
  563. // continue
  564. //}
  565. //if yzm, ok := result["r"].(map[string]interface{})["pic_str"].(string); ok && yzm != "" && len(yzm) >= 4 {
  566. // code = yzm
  567. // respheader = getCodeResp.Header
  568. // respcookie = getCodeResp.Cookies()
  569. // return
  570. //}
  571. }
  572. return
  573. }
  574. func getCode(b []byte, stype string, free bool) (code string, err error) {
  575. qu.Debug("验证码类型:", stype, ",是否免费:", free)
  576. //解析验证码
  577. request := req.C().R().
  578. SetHeader("accept", "application/json").
  579. SetFileReader("file", "1", bytes.NewReader(b))
  580. address := be.Cfg.ServerCodeFreeAddressOcr
  581. if !free {
  582. data := map[string]string{
  583. "grant_type": "",
  584. "username": be.Cfg.Username,
  585. "password": be.Cfg.Password,
  586. "scope": "",
  587. "client_id": "",
  588. "client_secret ": "",
  589. }
  590. request.SetFormData(data)
  591. address = be.Cfg.ServerCodeAddress + stype
  592. } else if stype == "6001" { //计算类验证码解析接口地址
  593. address = be.Cfg.ServerCodeFreeAddressArithmetic
  594. }
  595. qu.Debug("address:", address)
  596. var resp *req.Response
  597. resp, err = request.Post(address)
  598. if err != nil {
  599. qu.Debug("analysis code by path err: ", err)
  600. return
  601. }
  602. var result map[string]interface{}
  603. err = json.Unmarshal(resp.Bytes(), &result)
  604. qu.Debug("验证码解析结果:", free, result)
  605. if err == nil && result != nil {
  606. if free {
  607. r, _ := result["r"].(map[string]interface{})
  608. codeTmp := qu.ObjToString(r["code"])
  609. if len(codeTmp) >= 4 || stype == "6001" && codeTmp != "" {
  610. return codeTmp, nil
  611. }
  612. } else {
  613. if codeTmp, ok := result["r"].(map[string]interface{})["pic_str"].(string); ok && codeTmp != "" {
  614. if stype == "6001" || len(codeTmp) >= 4 {
  615. return codeTmp, nil
  616. }
  617. }
  618. }
  619. }
  620. return
  621. }
  622. // AnalyzeCodeScreenShot 截屏解析验证码
  623. func (b *GLBrowser) AnalyzeCodeScreenShot(tabTitle, tabUrl, selector string, selectorType int, timeout int64, stype string) (code string, err error) {
  624. ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
  625. if err != nil {
  626. return
  627. }
  628. var act chromedp.QueryAction
  629. var bt []byte
  630. switch selectorType {
  631. case selector_type_id:
  632. act = chromedp.Screenshot(selector, &bt, chromedp.ByID)
  633. case selector_type_query:
  634. act = chromedp.Screenshot(selector, &bt, chromedp.ByQuery)
  635. //case selector_type_search:
  636. //case selector_type_jspath:
  637. default:
  638. //option = chromedp.ByQueryAll
  639. chromedp.Screenshot(selector, &bt, chromedp.ByQueryAll)
  640. }
  641. err = chromedp.Run(ctx,
  642. act,
  643. )
  644. //保存
  645. if err = ioutil.WriteFile("code.png", bt, 0755); err != nil {
  646. qu.Debug(err)
  647. }
  648. code, err = getCode(bt, stype, true) //免费
  649. if err != nil || code == "" {
  650. code, err = getCode(bt, stype, false) //收费
  651. }
  652. return
  653. }
  654. // BindLuaState
  655. func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
  656. //执行暂停
  657. s.SetGlobal("browser_sleep", s.NewFunction(func(l *lua.LState) int {
  658. fmt.Println("---browser_sleep---")
  659. timeout := l.ToInt64(-1)
  660. if timeout == 0 {
  661. timeout = 5
  662. }
  663. time.Sleep(time.Duration(timeout) * time.Millisecond)
  664. return 0
  665. }))
  666. //关闭tabl页
  667. s.SetGlobal("browser_closetabs", s.NewFunction(func(l *lua.LState) int {
  668. fmt.Println("---browser_closetabs---")
  669. timeout := l.ToInt64(-3)
  670. tabTitle := l.ToString(-2)
  671. tabUrl := l.ToString(-1)
  672. if timeout == 0 {
  673. timeout = 5
  674. }
  675. b.CloseTabs(tabTitle, tabUrl, timeout)
  676. return 0
  677. }))
  678. //注册打开地址
  679. s.SetGlobal("browser_navagite", s.NewFunction(func(l *lua.LState) int {
  680. fmt.Println("---browser_navagite---")
  681. tabTitle := l.ToString(-5) //指定标签页title
  682. tabUrl := l.ToString(-4) //指定标签页url
  683. isNewTab := l.ToBool(-3) //是否打开新的标签页
  684. timeout := l.ToInt64(-2) //网页打开的超时时间
  685. targetUrl := l.ToString(-1) //打开网页的链接
  686. if err := b.Navigate(tabTitle, tabUrl, isNewTab, targetUrl, timeout); err != nil {
  687. l.Push(lua.LString(err.Error()))
  688. } else {
  689. l.Push(lua.LString("ok"))
  690. }
  691. return 1
  692. }))
  693. //执行浏览器端js
  694. s.SetGlobal("browser_executejs", s.NewFunction(func(l *lua.LState) int {
  695. fmt.Println("---browser_executejs---")
  696. tabTitle := l.ToString(-5)
  697. tabUrl := l.ToString(-4)
  698. timeout := l.ToInt64(-3)
  699. returnType := l.ToInt(-2) //返回数据类型
  700. script := l.ToString(-1) //执行的js
  701. switch returnType {
  702. case execute_return_type_string: //返回string
  703. var ret string
  704. if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
  705. l.Push(lua.LString("ok"))
  706. l.Push(lua.LString(ret))
  707. } else {
  708. l.Push(lua.LString("err"))
  709. l.Push(lua.LString(err.Error()))
  710. }
  711. case execute_return_type_list: //返回list
  712. var ret = make([]interface{}, 0, 0)
  713. var tmp = make(map[string]interface{})
  714. if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
  715. for i, v := range ret {
  716. tmp[strconv.Itoa(i)] = v
  717. }
  718. l.Push(lua.LString("ok"))
  719. l.Push(MapToTable(tmp))
  720. } else {
  721. l.Push(lua.LString("err"))
  722. l.Push(lua.LString(err.Error()))
  723. }
  724. case execute_return_type_table: //返回table
  725. var ret = make(map[string]interface{})
  726. if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
  727. l.Push(lua.LString("ok"))
  728. l.Push(MapToTable(ret))
  729. } else {
  730. l.Push(lua.LString("err"))
  731. l.Push(lua.LString(err.Error()))
  732. }
  733. }
  734. return 2
  735. }))
  736. //按键
  737. s.SetGlobal("browser_keysend", s.NewFunction(func(l *lua.LState) int {
  738. fmt.Println("---browser_keysend---")
  739. tabTitle := l.ToString(-6)
  740. tabUrl := l.ToString(-5)
  741. timeout := l.ToInt64(-4)
  742. words := l.ToString(-3)
  743. selectorType := l.ToInt(-2)
  744. selector := l.ToString(-1)
  745. fmt.Println(selector, words, selectorType, timeout)
  746. err := b.KeySend(tabTitle, tabUrl, selector, words, selectorType, timeout)
  747. if err != nil {
  748. l.Push(lua.LString(err.Error()))
  749. } else {
  750. l.Push(lua.LString("ok"))
  751. }
  752. return 1
  753. }))
  754. //点击
  755. s.SetGlobal("browser_click", s.NewFunction(func(l *lua.LState) int {
  756. fmt.Println("---browser_click---")
  757. tabTitle := l.ToString(-5)
  758. tabUrl := l.ToString(-4)
  759. timeout := l.ToInt64(-3)
  760. selectorType := l.ToInt(-2)
  761. selector := l.ToString(-1)
  762. err := b.Click(tabTitle, tabUrl, selector, selectorType, timeout)
  763. if err != nil {
  764. l.Push(lua.LString(err.Error()))
  765. } else {
  766. l.Push(lua.LString("ok"))
  767. }
  768. return 1
  769. }))
  770. //等待元素加载
  771. s.SetGlobal("browser_waitvisible", s.NewFunction(func(l *lua.LState) int {
  772. fmt.Println("---browser_waitvisible---")
  773. tabTitle := l.ToString(-5)
  774. tabUrl := l.ToString(-4)
  775. timeout := l.ToInt64(-3)
  776. selectorType := l.ToInt(-2) //选择器类型
  777. selector := l.ToString(-1) //选择器
  778. err := b.WaitVisible(tabTitle, tabUrl, selector, selectorType, timeout)
  779. if err != nil {
  780. l.Push(lua.LString(err.Error()))
  781. } else {
  782. l.Push(lua.LString("ok"))
  783. }
  784. return 1
  785. }))
  786. //下载附件
  787. s.SetGlobal("browser_downloadfile", s.NewFunction(func(l *lua.LState) int {
  788. tabTitle := l.ToString(-6)
  789. tabUrl := l.ToString(-5)
  790. timeout := l.ToInt64(-4)
  791. selectorType := l.ToInt(-3)
  792. selector := l.ToString(-2)
  793. save2dir := l.ToString(-1)
  794. err := b.DownloadFile(tabTitle, tabUrl, timeout, selector, selectorType, save2dir)
  795. if err != nil {
  796. l.Push(lua.LString(err.Error()))
  797. } else {
  798. l.Push(lua.LString("ok"))
  799. }
  800. return 1
  801. }))
  802. //注册打开地址
  803. s.SetGlobal("browser_navagite_download_res", s.NewFunction(func(l *lua.LState) int {
  804. tabTitle := l.ToString(-7)
  805. tabUrl := l.ToString(-6)
  806. timeout := l.ToInt64(-5)
  807. isNewTab := l.ToBool(-4)
  808. targetUrl := l.ToString(-3)
  809. saveFileTypeList := l.ToString(-2)
  810. savedir := l.ToString(-1)
  811. if err := b.NavigateAndSaveRes(tabTitle, tabUrl, timeout, isNewTab, targetUrl, saveFileTypeList, savedir); err != nil {
  812. l.Push(lua.LString(err.Error()))
  813. } else {
  814. l.Push(lua.LString("ok"))
  815. }
  816. return 1
  817. }))
  818. //s.SetGlobal("browser_analyzecode_bypath", s.NewFunction(func(S *lua.LState) int {
  819. // proxy := S.ToBool(-5)
  820. // url := S.ToString(-4)
  821. // stype := S.ToString(-3)
  822. // head := S.ToTable(-2)
  823. // cookie := S.ToString(-1)
  824. // headMap := TableToMap(head)
  825. // //qu.Debug("cookie----------", cookie)
  826. // //qu.Debug("headMap----------", headMap)
  827. // headJsonStr := ""
  828. // headByte, err := json.Marshal(headMap)
  829. // if err == nil {
  830. // headJsonStr = string(headByte)
  831. // }
  832. // code, respHead, respCookie := b.AnalyzeCodeByPath(url, stype, headJsonStr, cookie, proxy)
  833. // rhead, _ := json.Marshal(respHead)
  834. // respHeadMap := map[string]interface{}{}
  835. // json.Unmarshal(rhead, &respHeadMap)
  836. // hTable := MapToTable(respHeadMap)
  837. //
  838. // rcookie, _ := json.Marshal(respCookie)
  839. // respCookieMap := []map[string]interface{}{}
  840. // json.Unmarshal(rcookie, &respCookieMap)
  841. // cTable := MapToTable(map[string]interface{}{"cookie": respCookieMap})
  842. // S.Push(lua.LString(code))
  843. // S.Push(hTable)
  844. // S.Push(cTable.RawGetString("cookie"))
  845. // return 3
  846. //}))
  847. //发布时间格式化
  848. s.SetGlobal("browser_publishtime", s.NewFunction(func(l *lua.LState) int {
  849. text := l.ToString(-1)
  850. publishtime := getPublitime(text)
  851. l.Push(lua.LString(publishtime))
  852. return 1
  853. }))
  854. //截屏功能
  855. s.SetGlobal("browser_analyzecode_screenshot", s.NewFunction(func(l *lua.LState) int {
  856. tabTitle := l.ToString(-6)
  857. tabUrl := l.ToString(-5)
  858. stype := l.ToString(-4)
  859. timeout := l.ToInt64(-3)
  860. selectorType := l.ToInt(-2)
  861. selector := l.ToString(-1)
  862. code, _ := b.AnalyzeCodeScreenShot(tabTitle, tabUrl, selector, selectorType, timeout, stype)
  863. l.Push(lua.LString(code))
  864. return 1
  865. }))
  866. //保存数据
  867. s.SetGlobal("browser_savedata", s.NewFunction(func(l *lua.LState) int {
  868. //fmt.Println("---browser_savedata---")
  869. pageType := l.ToString(-2)
  870. data := l.ToTable(-1)
  871. result := TableToMap(data)
  872. if pageType == "list" {
  873. result["recordid"] = recordId
  874. }
  875. DataCache <- result
  876. return 0
  877. }))
  878. //获取数据
  879. s.SetGlobal("browser_getdata", s.NewFunction(func(l *lua.LState) int {
  880. fmt.Println("---browser_getdata---")
  881. num := l.ToInt(-1) //获取多少条数据
  882. count := len(Datas)
  883. if count == 0 {
  884. l.Push(lua.LString("err"))
  885. l.Push(lua.LString("当前可下载量为0"))
  886. } else {
  887. if count < num {
  888. num = count
  889. }
  890. data := Datas[:num]
  891. Datas = Datas[num:]
  892. tMap := MapToTable(map[string]interface{}{"data": data})
  893. l.Push(lua.LString("ok"))
  894. l.Push(tMap.RawGetString("data"))
  895. }
  896. return 2
  897. }))
  898. }