action.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. /**
  2. * 浏览器行为封装
  3. * 基础动作
  4. */
  5. package browser
  6. import (
  7. "context"
  8. "errors"
  9. "log"
  10. // "log"
  11. sp_util "KeyWebsiteMonitor/spider/util"
  12. util "KeyWebsiteMonitor/util"
  13. "regexp"
  14. "strconv"
  15. "strings"
  16. "time"
  17. "github.com/chromedp/chromedp"
  18. "github.com/yuin/gopher-lua"
  19. )
  20. const (
  21. selector_type_id = 0
  22. selector_type_query = 1
  23. selector_type_search = 2
  24. selector_type_jspath = 3
  25. selector_type_query_all = 4
  26. execute_return_type_string = 0
  27. execute_return_type_list = 1
  28. execute_return_type_table = 2
  29. )
  30. // findTab 根据标题、url找tab
  31. func (b *Browser) findTabContext(tabTitle, tabUrl string, timeoutInt64 int64) (ctx context.Context, cancelFn context.CancelFunc, err error) {
  32. if timeoutInt64 == 0 {
  33. timeoutInt64 = 5000
  34. }
  35. timeout := time.Duration(timeoutInt64) * time.Millisecond
  36. if tabTitle == "*" && tabUrl == "*" {
  37. return b.Ctx, b.CancelFn, nil
  38. } else if tabTitle == "" && tabUrl == "" {
  39. ctx, cancelFn = context.WithTimeout(b.Ctx, timeout)
  40. return ctx, cancelFn, nil
  41. } else {
  42. ts, err := chromedp.Targets(b.Ctx)
  43. if err != nil {
  44. return nil, nil, err
  45. }
  46. for _, t := range ts {
  47. if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) ||
  48. (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
  49. // log.Printf("find tab param<title,url>: %s %s found %s %s", tabTitle, tabUrl,
  50. // t.Title, t.URL)
  51. newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
  52. ctx, cancelFn = context.WithTimeout(newCtx, timeout)
  53. return ctx, cancelFn, nil
  54. }
  55. }
  56. }
  57. return nil, nil, errors.New("can't find tab")
  58. }
  59. // BindLuaState
  60. func (b *Browser) BindLuaState(state *lua.LState) {
  61. //执行暂停
  62. state.SetGlobal("browser_sleep", state.NewFunction(func(l *lua.LState) int {
  63. timeout := l.ToInt64(-1)
  64. if timeout == 0 {
  65. timeout = 1000
  66. }
  67. time.Sleep(time.Duration(timeout) * time.Millisecond)
  68. return 0
  69. }))
  70. //关闭tabl页
  71. state.SetGlobal("browser_closetabs", state.NewFunction(func(l *lua.LState) int {
  72. tabTitle := l.ToString(-3)
  73. tabUrl := l.ToString(-2)
  74. timeout := l.ToInt64(-1)
  75. if timeout == 0 {
  76. timeout = 5
  77. }
  78. b.CloseTabs(tabTitle, tabUrl, timeout)
  79. return 0
  80. }))
  81. //注册打开地址
  82. state.SetGlobal("browser_navagite", state.NewFunction(func(l *lua.LState) int {
  83. tabTitle := l.ToString(-5)
  84. tabUrl := l.ToString(-4)
  85. isNewTab := l.ToBool(-3)
  86. timeout := l.ToInt64(-2)
  87. targetUrl := l.ToString(-1)
  88. if err := b.Navigate(tabTitle, tabUrl, isNewTab, targetUrl, timeout); err != nil {
  89. l.Push(lua.LString(err.Error()))
  90. } else {
  91. l.Push(lua.LString("ok"))
  92. }
  93. return 1
  94. }))
  95. //注册打开地址
  96. state.SetGlobal("browser_navagite_with_abortflag", state.NewFunction(func(l *lua.LState) int {
  97. tabTitle := l.ToString(-6)
  98. tabUrl := l.ToString(-5)
  99. isNewTab := l.ToBool(-4)
  100. timeout := l.ToInt64(-3)
  101. targetUrl := l.ToString(-2)
  102. abortFlags := l.ToString(-1)
  103. if err := b.NavigateWithAbortFlags(tabTitle, tabUrl, isNewTab, targetUrl, abortFlags, timeout); err != nil {
  104. l.Push(lua.LString(err.Error()))
  105. } else {
  106. l.Push(lua.LString("ok"))
  107. }
  108. return 1
  109. }))
  110. //执行浏览器端js
  111. state.SetGlobal("browser_executejs", state.NewFunction(func(l *lua.LState) int {
  112. needSleep := l.ToBool(-6)
  113. tabTitle := l.ToString(-5)
  114. tabUrl := l.ToString(-4)
  115. timeout := l.ToInt64(-3)
  116. returnType := l.ToInt(-2)
  117. script := l.ToString(-1)
  118. switch returnType {
  119. case execute_return_type_string: //返回string
  120. var ret string
  121. if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout, needSleep); err == nil {
  122. l.Push(lua.LString("ok"))
  123. l.Push(lua.LString(ret))
  124. } else {
  125. l.Push(lua.LString("err"))
  126. l.Push(lua.LString(err.Error()))
  127. }
  128. case execute_return_type_list: //返回list
  129. var ret = make([]interface{}, 0, 0)
  130. var tmp = make(map[string]interface{})
  131. if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout, needSleep); err == nil {
  132. for i, v := range ret {
  133. tmp[strconv.Itoa(i)] = v
  134. }
  135. l.Push(lua.LString("ok"))
  136. l.Push(sp_util.MapToTable(tmp))
  137. } else {
  138. l.Push(lua.LString("err"))
  139. l.Push(lua.LString(err.Error()))
  140. }
  141. case execute_return_type_table: //返回table
  142. var ret = make(map[string]interface{})
  143. if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout, needSleep); err == nil {
  144. // log.Println("ret:", ret)
  145. l.Push(lua.LString("ok"))
  146. l.Push(sp_util.MapToTable(ret))
  147. } else {
  148. log.Println(tabTitle, tabUrl, "EEEEERRRR", err.Error())
  149. l.Push(lua.LString("err"))
  150. l.Push(lua.LString(err.Error()))
  151. }
  152. }
  153. return 2
  154. }))
  155. //按键
  156. state.SetGlobal("browser_keysend", state.NewFunction(func(l *lua.LState) int {
  157. tabTitle := l.ToString(-6)
  158. tabUrl := l.ToString(-5)
  159. timeout := l.ToInt64(-4)
  160. selectorType := l.ToInt(-3)
  161. selector := l.ToString(-2)
  162. words := l.ToString(-1)
  163. err := b.KeySend(tabTitle, tabUrl, selector, words, selectorType, timeout)
  164. if err != nil {
  165. l.Push(lua.LString(err.Error()))
  166. } else {
  167. l.Push(lua.LString("ok"))
  168. }
  169. return 1
  170. }))
  171. //点击
  172. state.SetGlobal("browser_click", state.NewFunction(func(l *lua.LState) int {
  173. tabTitle := l.ToString(-5)
  174. tabUrl := l.ToString(-4)
  175. timeout := l.ToInt64(-3)
  176. selectorType := l.ToInt(-2)
  177. selector := l.ToString(-1)
  178. err := b.Click(tabTitle, tabUrl, selector, selectorType, timeout)
  179. if err != nil {
  180. l.Push(lua.LString(err.Error()))
  181. } else {
  182. l.Push(lua.LString("ok"))
  183. }
  184. return 1
  185. }))
  186. //browser_history_back
  187. state.SetGlobal("browser_history_back", state.NewFunction(func(l *lua.LState) int {
  188. tabTitle := l.ToString(-3)
  189. tabUrl := l.ToString(-2)
  190. timeout := l.ToInt64(-1)
  191. err := b.GoHistoryBack(tabTitle, tabUrl, timeout)
  192. if err != nil {
  193. l.Push(lua.LString(err.Error()))
  194. } else {
  195. l.Push(lua.LString("ok"))
  196. }
  197. return 1
  198. }))
  199. //state.SetGlobal("browser_wait_visible", state.NewFunction(func(l *lua.LState) int {
  200. state.SetGlobal("browser_wait_ready", state.NewFunction(func(l *lua.LState) int {
  201. tabTitle := l.ToString(-5)
  202. tabUrl := l.ToString(-4)
  203. timeout := l.ToInt64(-3)
  204. selectorType := l.ToInt(-2)
  205. selector := l.ToString(-1)
  206. err := b.WaitVisible(tabTitle, tabUrl, selector, selectorType, timeout)
  207. if err != nil {
  208. l.Push(lua.LString(err.Error()))
  209. } else {
  210. l.Push(lua.LString("ok"))
  211. }
  212. return 1
  213. }))
  214. //点击
  215. state.SetGlobal("browser_downloadfile", state.NewFunction(func(l *lua.LState) int {
  216. tabTitle := l.ToString(-7)
  217. tabUrl := l.ToString(-6)
  218. timeout := l.ToInt64(-5)
  219. selectorType := l.ToInt(-4)
  220. selector := l.ToString(-3)
  221. filename := l.ToString(-2)
  222. save2dir := l.ToString(-1)
  223. err := b.DownloadFile(tabTitle, tabUrl, timeout, selector, selectorType, filename, save2dir)
  224. if err != nil {
  225. l.Push(lua.LString(err.Error()))
  226. } else {
  227. l.Push(lua.LString("ok"))
  228. }
  229. return 1
  230. }))
  231. //关闭tabl页
  232. state.SetGlobal("browser_closetabs_without", state.NewFunction(func(l *lua.LState) int {
  233. tabTitle := l.ToString(-3)
  234. tabUrl := l.ToString(-2)
  235. timeout := l.ToInt64(-1)
  236. if timeout == 0 {
  237. timeout = 5
  238. }
  239. b.CloseTabsWithout(tabTitle, tabUrl, timeout)
  240. return 0
  241. }))
  242. //browser_screenshot 网页局部截图
  243. state.SetGlobal("browser_screenshot", state.NewFunction(func(l *lua.LState) int {
  244. tabTitle := l.ToString(-6)
  245. tabUrl := l.ToString(-5)
  246. timeout := l.ToInt64(-4)
  247. selectorType := l.ToInt(-3)
  248. selector := l.ToString(-2)
  249. filename := l.ToString(-1)
  250. if timeout == 0 {
  251. timeout = 5
  252. }
  253. if err := b.Screenshot(tabTitle, tabUrl, timeout, selectorType, selector, filename); err != nil {
  254. l.Push(lua.LString(err.Error()))
  255. } else {
  256. l.Push(lua.LString("ok"))
  257. }
  258. return 1
  259. }))
  260. //browser_print2pdf 整个网页生成pdf
  261. state.SetGlobal("browser_print2pdf", state.NewFunction(func(l *lua.LState) int {
  262. tabTitle := l.ToString(-4)
  263. tabUrl := l.ToString(-3)
  264. timeout := l.ToInt64(-2)
  265. filename := l.ToString(-1)
  266. if timeout == 0 {
  267. timeout = 5
  268. }
  269. if err := b.PrintToPDF(tabTitle, tabUrl, timeout, filename); err != nil {
  270. l.Push(lua.LString(err.Error()))
  271. } else {
  272. l.Push(lua.LString("ok"))
  273. }
  274. return 1
  275. }))
  276. state.SetGlobal("browser_tabs", state.NewFunction(func(l *lua.LState) int {
  277. tabTitle := l.ToString(-3)
  278. tabUrl := l.ToString(-2)
  279. timeout := l.ToInt64(-1)
  280. if timeout == 0 {
  281. timeout = 500
  282. }
  283. var tmp = make(map[string]interface{})
  284. tabs, err := b.GetBrowserTabs(tabTitle, tabUrl, timeout)
  285. if err == nil {
  286. for i, v := range tabs {
  287. tmp[strconv.Itoa(i)] = v
  288. }
  289. l.Push(lua.LString("ok"))
  290. l.Push(sp_util.MapToTable(tmp))
  291. } else {
  292. l.Push(lua.LString("err"))
  293. l.Push(lua.LString(err.Error()))
  294. }
  295. return 2
  296. }))
  297. state.SetGlobal("browser_send_img_chatbot", state.NewFunction(func(l *lua.LState) int {
  298. mentioned := l.ToString(-3)
  299. uri := l.ToString(-2)
  300. img := l.ToString(-1)
  301. err := SendImage2ChatBot(uri, img, mentioned)
  302. if err != nil {
  303. l.Push(lua.LString("err"))
  304. l.Push(lua.LString(err.Error()))
  305. } else {
  306. l.Push(lua.LString("ok"))
  307. l.Push(lua.LString("ok"))
  308. }
  309. return 2
  310. }))
  311. // 编辑器原有方法
  312. state.SetGlobal("findContentText", state.NewFunction(func(S *lua.LState) int {
  313. gpath := S.ToString(-2)
  314. content := S.ToString(-1)
  315. ret := util.FindContentText(gpath, content)
  316. S.Push(ret)
  317. return 1
  318. }))
  319. state.SetGlobal("findOneText", state.NewFunction(func(S *lua.LState) int {
  320. nodetype := S.ToString(-3)
  321. gpath := S.ToString(-2)
  322. content := S.ToString(-1)
  323. ret := util.FindOneText(gpath, content, nodetype)
  324. S.Push(ret)
  325. return 1
  326. }))
  327. state.SetGlobal("findOneHtml", state.NewFunction(func(S *lua.LState) int {
  328. nodetype := S.ToString(-3)
  329. gpath := S.ToString(-2)
  330. content := S.ToString(-1)
  331. ret := util.FindOneHtml(gpath, content, nodetype)
  332. S.Push(ret)
  333. return 1
  334. }))
  335. state.SetGlobal("findListText", state.NewFunction(func(S *lua.LState) int {
  336. gpath := S.ToString(-2)
  337. content := S.ToString(-1)
  338. ret := state.NewTable()
  339. util.FindListText(gpath, content, ret)
  340. S.Push(ret)
  341. return 1
  342. }))
  343. state.SetGlobal("findListHtml", state.NewFunction(func(S *lua.LState) int {
  344. gpath := S.ToString(-2)
  345. content := S.ToString(-1)
  346. ret := state.NewTable()
  347. util.FindListHtml(gpath, content, ret)
  348. S.Push(ret)
  349. return 1
  350. }))
  351. state.SetGlobal("findMap", state.NewFunction(func(S *lua.LState) int {
  352. qmap := S.ToTable(-2)
  353. content := S.ToString(-1)
  354. ret := state.NewTable()
  355. util.FindMap(qmap, content, ret)
  356. S.Push(ret)
  357. return 1
  358. }))
  359. //手工延时
  360. state.SetGlobal("timeSleep", state.NewFunction(func(S *lua.LState) int {
  361. log.Println("开始睡眠。")
  362. timeout := S.ToInt64(-1)
  363. if timeout == 0 {
  364. timeout = 5000
  365. }
  366. time.Sleep(time.Duration(timeout) * time.Millisecond)
  367. return 0
  368. }))
  369. //支持正则
  370. state.SetGlobal("regexp", state.NewFunction(func(S *lua.LState) int {
  371. index := int(S.ToNumber(-1))
  372. regstr := S.ToString(-2)
  373. text := S.ToString(-3)
  374. reg := regexp.MustCompile(regstr)
  375. reps := reg.FindAllStringSubmatchIndex(text, -1)
  376. ret := state.NewTable()
  377. number := 0
  378. for _, v := range reps {
  379. number++
  380. ret.Insert(number, lua.LString(text[v[index]:v[index+1]]))
  381. }
  382. S.Push(ret)
  383. return 1
  384. }))
  385. //支持替换
  386. state.SetGlobal("replace", state.NewFunction(func(S *lua.LState) int {
  387. text := S.ToString(-3)
  388. old := S.ToString(-2)
  389. repl := S.ToString(-1)
  390. text = strings.Replace(text, old, repl, -1)
  391. S.Push(lua.LString(text))
  392. return 1
  393. }))
  394. //detail过滤
  395. state.SetGlobal("filterdetail", state.NewFunction(func(S *lua.LState) int {
  396. /*
  397. 1.长度判断 (特殊处理:详情请访问原网页!;详见原网页;见原网页;无;无相关内容;无正文内容)
  398. 2.是否含汉字
  399. */
  400. reg1 := regexp.MustCompile("(原网页|无|无相关内容|无正文内容|详见附件|见附件)")
  401. reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
  402. detail := S.ToString(-1)
  403. if reg1.MatchString(detail) {
  404. S.Push(lua.LBool(true))
  405. return 1
  406. }
  407. if len([]rune(detail)) < 50 || !reg2.MatchString(detail) {
  408. S.Push(lua.LBool(false))
  409. return 1
  410. }
  411. S.Push(lua.LBool(false))
  412. return 1
  413. }))
  414. //匹配汉字
  415. state.SetGlobal("matchan", state.NewFunction(func(S *lua.LState) int {
  416. reg1 := regexp.MustCompile("(见附件|详见附件)")
  417. reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
  418. detail := S.ToString(-1)
  419. detail = reg1.ReplaceAllString(detail, "")
  420. ok := reg2.MatchString(detail)
  421. S.Push(lua.LBool(ok))
  422. return 1
  423. }))
  424. //匹配
  425. state.SetGlobal("stringFind", state.NewFunction(func(S *lua.LState) int {
  426. regstr := S.ToString(-1)
  427. text := S.ToString(-2)
  428. reg := regexp.MustCompile(regstr)
  429. result := reg.FindString(text)
  430. isMatch := false
  431. if result != "" {
  432. isMatch = true
  433. }
  434. S.Push(lua.LString(result))
  435. S.Push(lua.LBool(isMatch))
  436. return 2
  437. }))
  438. //截取
  439. state.SetGlobal("stringSub", state.NewFunction(func(S *lua.LState) int {
  440. text := S.ToString(-3)
  441. start := S.ToInt(-2)
  442. end := S.ToInt(-1)
  443. result := ""
  444. if len(text) > 0 {
  445. textRune := []rune(text)
  446. textLen := len(textRune)
  447. if end < 0 {
  448. if start > 0 { //正向截取到倒数第end位
  449. result = string(textRune[start-1 : textLen+1+end])
  450. } else if start < 0 { //反向截取 从倒数第start位截取到倒数第end位
  451. result = string(textRune[textLen+start : textLen+1+end])
  452. }
  453. } else if start > 0 && end >= start && end <= textLen { //从第start个截取到第end个
  454. result = string(textRune[start-1 : end])
  455. }
  456. // if end == -1 {
  457. // if start >= 1 { //正向截取到结尾
  458. // result = string(textRune[start-1:])
  459. // } else if start < 0 && textLen+start >= 0 { //反向截取后缀
  460. // result = string(textRune[textLen+start:])
  461. // }
  462. // } else if start >= 1 && end <= textLen { //从第start个截取到第end个
  463. // result = string(textRune[start-1 : end])
  464. // }
  465. }
  466. S.Push(lua.LString(result))
  467. return 1
  468. }))
  469. //长度
  470. state.SetGlobal("stringLen", state.NewFunction(func(S *lua.LState) int {
  471. text := S.ToString(-1)
  472. textLen := len([]rune(text))
  473. S.Push(lua.LNumber(textLen))
  474. return 1
  475. }))
  476. //去除特殊标签中间内容
  477. state.SetGlobal("getPureContent", state.NewFunction(func(S *lua.LState) int {
  478. con := S.ToString(-1)
  479. reg := regexp.MustCompile("(?s)<(!%-%-|!--|style).*?(%-%-|--|style)>") //注释 css
  480. con = reg.ReplaceAllString(con, "")
  481. // indexArr := reg.FindAllStringIndex(con, -1)
  482. // for i := len(indexArr) - 1; i >= 0; i-- {
  483. // if index := indexArr[i]; len(index) == 2 {
  484. // con = con[:index[0]] + con[index[1]:]
  485. // }
  486. // }
  487. S.Push(lua.LString(con))
  488. return 1
  489. }))
  490. }