123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513 |
- /**
- * 浏览器行为封装
- * 基础动作
- */
- package browser
- import (
- "context"
- "errors"
- "log"
- // "log"
- sp_util "KeyWebsiteMonitor/spider/util"
- util "KeyWebsiteMonitor/util"
- "regexp"
- "strconv"
- "strings"
- "time"
- "github.com/chromedp/chromedp"
- "github.com/yuin/gopher-lua"
- )
- const (
- selector_type_id = 0
- selector_type_query = 1
- selector_type_search = 2
- selector_type_jspath = 3
- selector_type_query_all = 4
- execute_return_type_string = 0
- execute_return_type_list = 1
- execute_return_type_table = 2
- )
- // findTab 根据标题、url找tab
- func (b *Browser) findTabContext(tabTitle, tabUrl string, timeoutInt64 int64) (ctx context.Context, cancelFn context.CancelFunc, err error) {
- if timeoutInt64 == 0 {
- timeoutInt64 = 5000
- }
- timeout := time.Duration(timeoutInt64) * time.Millisecond
- if tabTitle == "*" && tabUrl == "*" {
- return b.Ctx, b.CancelFn, nil
- } else if tabTitle == "" && tabUrl == "" {
- ctx, cancelFn = context.WithTimeout(b.Ctx, timeout)
- return ctx, cancelFn, nil
- } else {
- ts, err := chromedp.Targets(b.Ctx)
- if err != nil {
- return nil, nil, err
- }
- for _, t := range ts {
- if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) ||
- (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
- // log.Printf("find tab param<title,url>: %s %s found %s %s", tabTitle, tabUrl,
- // t.Title, t.URL)
- newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
- ctx, cancelFn = context.WithTimeout(newCtx, timeout)
- return ctx, cancelFn, nil
- }
- }
- }
- return nil, nil, errors.New("can't find tab")
- }
- // BindLuaState
- func (b *Browser) BindLuaState(state *lua.LState) {
- //执行暂停
- state.SetGlobal("browser_sleep", state.NewFunction(func(l *lua.LState) int {
- timeout := l.ToInt64(-1)
- if timeout == 0 {
- timeout = 1000
- }
- time.Sleep(time.Duration(timeout) * time.Millisecond)
- return 0
- }))
- //关闭tabl页
- state.SetGlobal("browser_closetabs", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-3)
- tabUrl := l.ToString(-2)
- timeout := l.ToInt64(-1)
- if timeout == 0 {
- timeout = 5
- }
- b.CloseTabs(tabTitle, tabUrl, timeout)
- return 0
- }))
- //注册打开地址
- state.SetGlobal("browser_navagite", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-5)
- tabUrl := l.ToString(-4)
- isNewTab := l.ToBool(-3)
- timeout := l.ToInt64(-2)
- targetUrl := l.ToString(-1)
- if err := b.Navigate(tabTitle, tabUrl, isNewTab, targetUrl, timeout); err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //注册打开地址
- state.SetGlobal("browser_navagite_with_abortflag", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-6)
- tabUrl := l.ToString(-5)
- isNewTab := l.ToBool(-4)
- timeout := l.ToInt64(-3)
- targetUrl := l.ToString(-2)
- abortFlags := l.ToString(-1)
- if err := b.NavigateWithAbortFlags(tabTitle, tabUrl, isNewTab, targetUrl, abortFlags, timeout); err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //执行浏览器端js
- state.SetGlobal("browser_executejs", state.NewFunction(func(l *lua.LState) int {
- needSleep := l.ToBool(-6)
- tabTitle := l.ToString(-5)
- tabUrl := l.ToString(-4)
- timeout := l.ToInt64(-3)
- returnType := l.ToInt(-2)
- script := l.ToString(-1)
- switch returnType {
- case execute_return_type_string: //返回string
- var ret string
- if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout, needSleep); err == nil {
- l.Push(lua.LString("ok"))
- l.Push(lua.LString(ret))
- } else {
- l.Push(lua.LString("err"))
- l.Push(lua.LString(err.Error()))
- }
- case execute_return_type_list: //返回list
- var ret = make([]interface{}, 0, 0)
- var tmp = make(map[string]interface{})
- if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout, needSleep); err == nil {
- for i, v := range ret {
- tmp[strconv.Itoa(i)] = v
- }
- l.Push(lua.LString("ok"))
- l.Push(sp_util.MapToTable(tmp))
- } else {
- l.Push(lua.LString("err"))
- l.Push(lua.LString(err.Error()))
- }
- case execute_return_type_table: //返回table
- var ret = make(map[string]interface{})
- if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout, needSleep); err == nil {
- // log.Println("ret:", ret)
- l.Push(lua.LString("ok"))
- l.Push(sp_util.MapToTable(ret))
- } else {
- log.Println(tabTitle, tabUrl, "EEEEERRRR", err.Error())
- l.Push(lua.LString("err"))
- l.Push(lua.LString(err.Error()))
- }
- }
- return 2
- }))
- //按键
- state.SetGlobal("browser_keysend", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-6)
- tabUrl := l.ToString(-5)
- timeout := l.ToInt64(-4)
- selectorType := l.ToInt(-3)
- selector := l.ToString(-2)
- words := l.ToString(-1)
- err := b.KeySend(tabTitle, tabUrl, selector, words, selectorType, timeout)
- if err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //点击
- state.SetGlobal("browser_click", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-5)
- tabUrl := l.ToString(-4)
- timeout := l.ToInt64(-3)
- selectorType := l.ToInt(-2)
- selector := l.ToString(-1)
- err := b.Click(tabTitle, tabUrl, selector, selectorType, timeout)
- if err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //browser_history_back
- state.SetGlobal("browser_history_back", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-3)
- tabUrl := l.ToString(-2)
- timeout := l.ToInt64(-1)
- err := b.GoHistoryBack(tabTitle, tabUrl, timeout)
- if err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //state.SetGlobal("browser_wait_visible", state.NewFunction(func(l *lua.LState) int {
- state.SetGlobal("browser_wait_ready", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-5)
- tabUrl := l.ToString(-4)
- timeout := l.ToInt64(-3)
- selectorType := l.ToInt(-2)
- selector := l.ToString(-1)
- err := b.WaitVisible(tabTitle, tabUrl, selector, selectorType, timeout)
- if err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //点击
- state.SetGlobal("browser_downloadfile", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-7)
- tabUrl := l.ToString(-6)
- timeout := l.ToInt64(-5)
- selectorType := l.ToInt(-4)
- selector := l.ToString(-3)
- filename := l.ToString(-2)
- save2dir := l.ToString(-1)
- err := b.DownloadFile(tabTitle, tabUrl, timeout, selector, selectorType, filename, save2dir)
- if err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //关闭tabl页
- state.SetGlobal("browser_closetabs_without", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-3)
- tabUrl := l.ToString(-2)
- timeout := l.ToInt64(-1)
- if timeout == 0 {
- timeout = 5
- }
- b.CloseTabsWithout(tabTitle, tabUrl, timeout)
- return 0
- }))
- //browser_screenshot 网页局部截图
- state.SetGlobal("browser_screenshot", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-6)
- tabUrl := l.ToString(-5)
- timeout := l.ToInt64(-4)
- selectorType := l.ToInt(-3)
- selector := l.ToString(-2)
- filename := l.ToString(-1)
- if timeout == 0 {
- timeout = 5
- }
- if err := b.Screenshot(tabTitle, tabUrl, timeout, selectorType, selector, filename); err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- //browser_print2pdf 整个网页生成pdf
- state.SetGlobal("browser_print2pdf", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-4)
- tabUrl := l.ToString(-3)
- timeout := l.ToInt64(-2)
- filename := l.ToString(-1)
- if timeout == 0 {
- timeout = 5
- }
- if err := b.PrintToPDF(tabTitle, tabUrl, timeout, filename); err != nil {
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- }
- return 1
- }))
- state.SetGlobal("browser_tabs", state.NewFunction(func(l *lua.LState) int {
- tabTitle := l.ToString(-3)
- tabUrl := l.ToString(-2)
- timeout := l.ToInt64(-1)
- if timeout == 0 {
- timeout = 500
- }
- var tmp = make(map[string]interface{})
- tabs, err := b.GetBrowserTabs(tabTitle, tabUrl, timeout)
- if err == nil {
- for i, v := range tabs {
- tmp[strconv.Itoa(i)] = v
- }
- l.Push(lua.LString("ok"))
- l.Push(sp_util.MapToTable(tmp))
- } else {
- l.Push(lua.LString("err"))
- l.Push(lua.LString(err.Error()))
- }
- return 2
- }))
- state.SetGlobal("browser_send_img_chatbot", state.NewFunction(func(l *lua.LState) int {
- mentioned := l.ToString(-3)
- uri := l.ToString(-2)
- img := l.ToString(-1)
- err := SendImage2ChatBot(uri, img, mentioned)
- if err != nil {
- l.Push(lua.LString("err"))
- l.Push(lua.LString(err.Error()))
- } else {
- l.Push(lua.LString("ok"))
- l.Push(lua.LString("ok"))
- }
- return 2
- }))
- // 编辑器原有方法
- state.SetGlobal("findContentText", state.NewFunction(func(S *lua.LState) int {
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := util.FindContentText(gpath, content)
- S.Push(ret)
- return 1
- }))
- state.SetGlobal("findOneText", state.NewFunction(func(S *lua.LState) int {
- nodetype := S.ToString(-3)
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := util.FindOneText(gpath, content, nodetype)
- S.Push(ret)
- return 1
- }))
- state.SetGlobal("findOneHtml", state.NewFunction(func(S *lua.LState) int {
- nodetype := S.ToString(-3)
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := util.FindOneHtml(gpath, content, nodetype)
- S.Push(ret)
- return 1
- }))
- state.SetGlobal("findListText", state.NewFunction(func(S *lua.LState) int {
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := state.NewTable()
- util.FindListText(gpath, content, ret)
- S.Push(ret)
- return 1
- }))
- state.SetGlobal("findListHtml", state.NewFunction(func(S *lua.LState) int {
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := state.NewTable()
- util.FindListHtml(gpath, content, ret)
- S.Push(ret)
- return 1
- }))
- state.SetGlobal("findMap", state.NewFunction(func(S *lua.LState) int {
- qmap := S.ToTable(-2)
- content := S.ToString(-1)
- ret := state.NewTable()
- util.FindMap(qmap, content, ret)
- S.Push(ret)
- return 1
- }))
- //手工延时
- state.SetGlobal("timeSleep", state.NewFunction(func(S *lua.LState) int {
- log.Println("开始睡眠。")
- timeout := S.ToInt64(-1)
- if timeout == 0 {
- timeout = 5000
- }
- time.Sleep(time.Duration(timeout) * time.Millisecond)
- return 0
- }))
- //支持正则
- state.SetGlobal("regexp", state.NewFunction(func(S *lua.LState) int {
- index := int(S.ToNumber(-1))
- regstr := S.ToString(-2)
- text := S.ToString(-3)
- reg := regexp.MustCompile(regstr)
- reps := reg.FindAllStringSubmatchIndex(text, -1)
- ret := state.NewTable()
- number := 0
- for _, v := range reps {
- number++
- ret.Insert(number, lua.LString(text[v[index]:v[index+1]]))
- }
- S.Push(ret)
- return 1
- }))
- //支持替换
- state.SetGlobal("replace", state.NewFunction(func(S *lua.LState) int {
- text := S.ToString(-3)
- old := S.ToString(-2)
- repl := S.ToString(-1)
- text = strings.Replace(text, old, repl, -1)
- S.Push(lua.LString(text))
- return 1
- }))
- //detail过滤
- state.SetGlobal("filterdetail", state.NewFunction(func(S *lua.LState) int {
- /*
- 1.长度判断 (特殊处理:详情请访问原网页!;详见原网页;见原网页;无;无相关内容;无正文内容)
- 2.是否含汉字
- */
- reg1 := regexp.MustCompile("(原网页|无|无相关内容|无正文内容|详见附件|见附件)")
- reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
- detail := S.ToString(-1)
- if reg1.MatchString(detail) {
- S.Push(lua.LBool(true))
- return 1
- }
- if len([]rune(detail)) < 50 || !reg2.MatchString(detail) {
- S.Push(lua.LBool(false))
- return 1
- }
- S.Push(lua.LBool(false))
- return 1
- }))
- //匹配汉字
- state.SetGlobal("matchan", state.NewFunction(func(S *lua.LState) int {
- reg1 := regexp.MustCompile("(见附件|详见附件)")
- reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
- detail := S.ToString(-1)
- detail = reg1.ReplaceAllString(detail, "")
- ok := reg2.MatchString(detail)
- S.Push(lua.LBool(ok))
- return 1
- }))
- //匹配
- state.SetGlobal("stringFind", state.NewFunction(func(S *lua.LState) int {
- regstr := S.ToString(-1)
- text := S.ToString(-2)
- reg := regexp.MustCompile(regstr)
- result := reg.FindString(text)
- isMatch := false
- if result != "" {
- isMatch = true
- }
- S.Push(lua.LString(result))
- S.Push(lua.LBool(isMatch))
- return 2
- }))
- //截取
- state.SetGlobal("stringSub", state.NewFunction(func(S *lua.LState) int {
- text := S.ToString(-3)
- start := S.ToInt(-2)
- end := S.ToInt(-1)
- result := ""
- if len(text) > 0 {
- textRune := []rune(text)
- textLen := len(textRune)
- if end < 0 {
- if start > 0 { //正向截取到倒数第end位
- result = string(textRune[start-1 : textLen+1+end])
- } else if start < 0 { //反向截取 从倒数第start位截取到倒数第end位
- result = string(textRune[textLen+start : textLen+1+end])
- }
- } else if start > 0 && end >= start && end <= textLen { //从第start个截取到第end个
- result = string(textRune[start-1 : end])
- }
- // if end == -1 {
- // if start >= 1 { //正向截取到结尾
- // result = string(textRune[start-1:])
- // } else if start < 0 && textLen+start >= 0 { //反向截取后缀
- // result = string(textRune[textLen+start:])
- // }
- // } else if start >= 1 && end <= textLen { //从第start个截取到第end个
- // result = string(textRune[start-1 : end])
- // }
- }
- S.Push(lua.LString(result))
- return 1
- }))
- //长度
- state.SetGlobal("stringLen", state.NewFunction(func(S *lua.LState) int {
- text := S.ToString(-1)
- textLen := len([]rune(text))
- S.Push(lua.LNumber(textLen))
- return 1
- }))
- //去除特殊标签中间内容
- state.SetGlobal("getPureContent", state.NewFunction(func(S *lua.LState) int {
- con := S.ToString(-1)
- reg := regexp.MustCompile("(?s)<(!%-%-|!--|style).*?(%-%-|--|style)>") //注释 css
- con = reg.ReplaceAllString(con, "")
- // indexArr := reg.FindAllStringIndex(con, -1)
- // for i := len(indexArr) - 1; i >= 0; i-- {
- // if index := indexArr[i]; len(index) == 2 {
- // con = con[:index[0]] + con[index[1]:]
- // }
- // }
- S.Push(lua.LString(con))
- return 1
- }))
- }
|