/** * 浏览器行为封装 * 基础动作 */ package browser import ( "context" "errors" "log" // "log" sp_util "KeyWebsiteMonitor/spider/util" util "KeyWebsiteMonitor/util" "regexp" "strconv" "strings" "time" "github.com/chromedp/chromedp" "github.com/yuin/gopher-lua" ) const ( selector_type_id = 0 selector_type_query = 1 selector_type_search = 2 selector_type_jspath = 3 selector_type_query_all = 4 execute_return_type_string = 0 execute_return_type_list = 1 execute_return_type_table = 2 ) // findTab 根据标题、url找tab func (b *Browser) findTabContext(tabTitle, tabUrl string, timeoutInt64 int64) (ctx context.Context, cancelFn context.CancelFunc, err error) { if timeoutInt64 == 0 { timeoutInt64 = 5000 } timeout := time.Duration(timeoutInt64) * time.Millisecond if tabTitle == "*" && tabUrl == "*" { return b.Ctx, b.CancelFn, nil } else if tabTitle == "" && tabUrl == "" { ctx, cancelFn = context.WithTimeout(b.Ctx, timeout) return ctx, cancelFn, nil } else { ts, err := chromedp.Targets(b.Ctx) if err != nil { return nil, nil, err } for _, t := range ts { if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) || (tabUrl != "" && strings.Contains(t.URL, tabUrl)) { // log.Printf("find tab param: %s %s found %s %s", tabTitle, tabUrl, // t.Title, t.URL) newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID)) ctx, cancelFn = context.WithTimeout(newCtx, timeout) return ctx, cancelFn, nil } } } return nil, nil, errors.New("can't find tab") } // BindLuaState func (b *Browser) BindLuaState(state *lua.LState) { //执行暂停 state.SetGlobal("browser_sleep", state.NewFunction(func(l *lua.LState) int { timeout := l.ToInt64(-1) if timeout == 0 { timeout = 1000 } time.Sleep(time.Duration(timeout) * time.Millisecond) return 0 })) //关闭tabl页 state.SetGlobal("browser_closetabs", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-3) tabUrl := l.ToString(-2) timeout := l.ToInt64(-1) if timeout == 0 { timeout = 5 } b.CloseTabs(tabTitle, tabUrl, timeout) return 0 })) //注册打开地址 state.SetGlobal("browser_navagite", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-5) tabUrl := l.ToString(-4) isNewTab := l.ToBool(-3) timeout := l.ToInt64(-2) targetUrl := l.ToString(-1) if err := b.Navigate(tabTitle, tabUrl, isNewTab, targetUrl, timeout); err != nil { l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) } return 1 })) //注册打开地址 state.SetGlobal("browser_navagite_with_abortflag", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-6) tabUrl := l.ToString(-5) isNewTab := l.ToBool(-4) timeout := l.ToInt64(-3) targetUrl := l.ToString(-2) abortFlags := l.ToString(-1) if err := b.NavigateWithAbortFlags(tabTitle, tabUrl, isNewTab, targetUrl, abortFlags, timeout); err != nil { l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) } return 1 })) //执行浏览器端js state.SetGlobal("browser_executejs", state.NewFunction(func(l *lua.LState) int { needSleep := l.ToBool(-6) tabTitle := l.ToString(-5) tabUrl := l.ToString(-4) timeout := l.ToInt64(-3) returnType := l.ToInt(-2) script := l.ToString(-1) switch returnType { case execute_return_type_string: //返回string var ret string if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout, needSleep); err == nil { l.Push(lua.LString("ok")) l.Push(lua.LString(ret)) } else { l.Push(lua.LString("err")) l.Push(lua.LString(err.Error())) } case execute_return_type_list: //返回list var ret = make([]interface{}, 0, 0) var tmp = make(map[string]interface{}) if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout, needSleep); err == nil { for i, v := range ret { tmp[strconv.Itoa(i)] = v } l.Push(lua.LString("ok")) l.Push(sp_util.MapToTable(tmp)) } else { l.Push(lua.LString("err")) l.Push(lua.LString(err.Error())) } case execute_return_type_table: //返回table var ret = make(map[string]interface{}) if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout, needSleep); err == nil { // log.Println("ret:", ret) l.Push(lua.LString("ok")) l.Push(sp_util.MapToTable(ret)) } else { log.Println(tabTitle, tabUrl, "EEEEERRRR", err.Error()) l.Push(lua.LString("err")) l.Push(lua.LString(err.Error())) } } return 2 })) //按键 state.SetGlobal("browser_keysend", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-6) tabUrl := l.ToString(-5) timeout := l.ToInt64(-4) selectorType := l.ToInt(-3) selector := l.ToString(-2) words := l.ToString(-1) err := b.KeySend(tabTitle, tabUrl, selector, words, selectorType, timeout) if err != nil { l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) } return 1 })) //点击 state.SetGlobal("browser_click", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-5) tabUrl := l.ToString(-4) timeout := l.ToInt64(-3) selectorType := l.ToInt(-2) selector := l.ToString(-1) err := b.Click(tabTitle, tabUrl, selector, selectorType, timeout) if err != nil { l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) } return 1 })) //browser_history_back state.SetGlobal("browser_history_back", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-3) tabUrl := l.ToString(-2) timeout := l.ToInt64(-1) err := b.GoHistoryBack(tabTitle, tabUrl, timeout) if err != nil { l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) } return 1 })) //state.SetGlobal("browser_wait_visible", state.NewFunction(func(l *lua.LState) int { state.SetGlobal("browser_wait_ready", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-5) tabUrl := l.ToString(-4) timeout := l.ToInt64(-3) selectorType := l.ToInt(-2) selector := l.ToString(-1) err := b.WaitVisible(tabTitle, tabUrl, selector, selectorType, timeout) if err != nil { l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) } return 1 })) //点击 state.SetGlobal("browser_downloadfile", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-7) tabUrl := l.ToString(-6) timeout := l.ToInt64(-5) selectorType := l.ToInt(-4) selector := l.ToString(-3) filename := l.ToString(-2) save2dir := l.ToString(-1) err := b.DownloadFile(tabTitle, tabUrl, timeout, selector, selectorType, filename, save2dir) if err != nil { l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) } return 1 })) //关闭tabl页 state.SetGlobal("browser_closetabs_without", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-3) tabUrl := l.ToString(-2) timeout := l.ToInt64(-1) if timeout == 0 { timeout = 5 } b.CloseTabsWithout(tabTitle, tabUrl, timeout) return 0 })) //browser_screenshot 网页局部截图 state.SetGlobal("browser_screenshot", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-6) tabUrl := l.ToString(-5) timeout := l.ToInt64(-4) selectorType := l.ToInt(-3) selector := l.ToString(-2) filename := l.ToString(-1) if timeout == 0 { timeout = 5 } if err := b.Screenshot(tabTitle, tabUrl, timeout, selectorType, selector, filename); err != nil { l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) } return 1 })) //browser_print2pdf 整个网页生成pdf state.SetGlobal("browser_print2pdf", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-4) tabUrl := l.ToString(-3) timeout := l.ToInt64(-2) filename := l.ToString(-1) if timeout == 0 { timeout = 5 } if err := b.PrintToPDF(tabTitle, tabUrl, timeout, filename); err != nil { l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) } return 1 })) state.SetGlobal("browser_tabs", state.NewFunction(func(l *lua.LState) int { tabTitle := l.ToString(-3) tabUrl := l.ToString(-2) timeout := l.ToInt64(-1) if timeout == 0 { timeout = 500 } var tmp = make(map[string]interface{}) tabs, err := b.GetBrowserTabs(tabTitle, tabUrl, timeout) if err == nil { for i, v := range tabs { tmp[strconv.Itoa(i)] = v } l.Push(lua.LString("ok")) l.Push(sp_util.MapToTable(tmp)) } else { l.Push(lua.LString("err")) l.Push(lua.LString(err.Error())) } return 2 })) state.SetGlobal("browser_send_img_chatbot", state.NewFunction(func(l *lua.LState) int { mentioned := l.ToString(-3) uri := l.ToString(-2) img := l.ToString(-1) err := SendImage2ChatBot(uri, img, mentioned) if err != nil { l.Push(lua.LString("err")) l.Push(lua.LString(err.Error())) } else { l.Push(lua.LString("ok")) l.Push(lua.LString("ok")) } return 2 })) // 编辑器原有方法 state.SetGlobal("findContentText", state.NewFunction(func(S *lua.LState) int { gpath := S.ToString(-2) content := S.ToString(-1) ret := util.FindContentText(gpath, content) S.Push(ret) return 1 })) state.SetGlobal("findOneText", state.NewFunction(func(S *lua.LState) int { nodetype := S.ToString(-3) gpath := S.ToString(-2) content := S.ToString(-1) ret := util.FindOneText(gpath, content, nodetype) S.Push(ret) return 1 })) state.SetGlobal("findOneHtml", state.NewFunction(func(S *lua.LState) int { nodetype := S.ToString(-3) gpath := S.ToString(-2) content := S.ToString(-1) ret := util.FindOneHtml(gpath, content, nodetype) S.Push(ret) return 1 })) state.SetGlobal("findListText", state.NewFunction(func(S *lua.LState) int { gpath := S.ToString(-2) content := S.ToString(-1) ret := state.NewTable() util.FindListText(gpath, content, ret) S.Push(ret) return 1 })) state.SetGlobal("findListHtml", state.NewFunction(func(S *lua.LState) int { gpath := S.ToString(-2) content := S.ToString(-1) ret := state.NewTable() util.FindListHtml(gpath, content, ret) S.Push(ret) return 1 })) state.SetGlobal("findMap", state.NewFunction(func(S *lua.LState) int { qmap := S.ToTable(-2) content := S.ToString(-1) ret := state.NewTable() util.FindMap(qmap, content, ret) S.Push(ret) return 1 })) //手工延时 state.SetGlobal("timeSleep", state.NewFunction(func(S *lua.LState) int { log.Println("开始睡眠。") timeout := S.ToInt64(-1) if timeout == 0 { timeout = 5000 } time.Sleep(time.Duration(timeout) * time.Millisecond) return 0 })) //支持正则 state.SetGlobal("regexp", state.NewFunction(func(S *lua.LState) int { index := int(S.ToNumber(-1)) regstr := S.ToString(-2) text := S.ToString(-3) reg := regexp.MustCompile(regstr) reps := reg.FindAllStringSubmatchIndex(text, -1) ret := state.NewTable() number := 0 for _, v := range reps { number++ ret.Insert(number, lua.LString(text[v[index]:v[index+1]])) } S.Push(ret) return 1 })) //支持替换 state.SetGlobal("replace", state.NewFunction(func(S *lua.LState) int { text := S.ToString(-3) old := S.ToString(-2) repl := S.ToString(-1) text = strings.Replace(text, old, repl, -1) S.Push(lua.LString(text)) return 1 })) //detail过滤 state.SetGlobal("filterdetail", state.NewFunction(func(S *lua.LState) int { /* 1.长度判断 (特殊处理:详情请访问原网页!;详见原网页;见原网页;无;无相关内容;无正文内容) 2.是否含汉字 */ reg1 := regexp.MustCompile("(原网页|无|无相关内容|无正文内容|详见附件|见附件)") reg2 := regexp.MustCompile("[\u4e00-\u9fa5]") detail := S.ToString(-1) if reg1.MatchString(detail) { S.Push(lua.LBool(true)) return 1 } if len([]rune(detail)) < 50 || !reg2.MatchString(detail) { S.Push(lua.LBool(false)) return 1 } S.Push(lua.LBool(false)) return 1 })) //匹配汉字 state.SetGlobal("matchan", state.NewFunction(func(S *lua.LState) int { reg1 := regexp.MustCompile("(见附件|详见附件)") reg2 := regexp.MustCompile("[\u4e00-\u9fa5]") detail := S.ToString(-1) detail = reg1.ReplaceAllString(detail, "") ok := reg2.MatchString(detail) S.Push(lua.LBool(ok)) return 1 })) //匹配 state.SetGlobal("stringFind", state.NewFunction(func(S *lua.LState) int { regstr := S.ToString(-1) text := S.ToString(-2) reg := regexp.MustCompile(regstr) result := reg.FindString(text) isMatch := false if result != "" { isMatch = true } S.Push(lua.LString(result)) S.Push(lua.LBool(isMatch)) return 2 })) //截取 state.SetGlobal("stringSub", state.NewFunction(func(S *lua.LState) int { text := S.ToString(-3) start := S.ToInt(-2) end := S.ToInt(-1) result := "" if len(text) > 0 { textRune := []rune(text) textLen := len(textRune) if end < 0 { if start > 0 { //正向截取到倒数第end位 result = string(textRune[start-1 : textLen+1+end]) } else if start < 0 { //反向截取 从倒数第start位截取到倒数第end位 result = string(textRune[textLen+start : textLen+1+end]) } } else if start > 0 && end >= start && end <= textLen { //从第start个截取到第end个 result = string(textRune[start-1 : end]) } // if end == -1 { // if start >= 1 { //正向截取到结尾 // result = string(textRune[start-1:]) // } else if start < 0 && textLen+start >= 0 { //反向截取后缀 // result = string(textRune[textLen+start:]) // } // } else if start >= 1 && end <= textLen { //从第start个截取到第end个 // result = string(textRune[start-1 : end]) // } } S.Push(lua.LString(result)) return 1 })) //长度 state.SetGlobal("stringLen", state.NewFunction(func(S *lua.LState) int { text := S.ToString(-1) textLen := len([]rune(text)) S.Push(lua.LNumber(textLen)) return 1 })) //去除特殊标签中间内容 state.SetGlobal("getPureContent", state.NewFunction(func(S *lua.LState) int { con := S.ToString(-1) reg := regexp.MustCompile("(?s)<(!%-%-|!--|style).*?(%-%-|--|style)>") //注释 css con = reg.ReplaceAllString(con, "") // indexArr := reg.FindAllStringIndex(con, -1) // for i := len(indexArr) - 1; i >= 0; i-- { // if index := indexArr[i]; len(index) == 2 { // con = con[:index[0]] + con[index[1]:] // } // } S.Push(lua.LString(con)) return 1 })) }