package script
import (
"context"
"errors"
"fmt"
"github.com/yuin/gopher-lua/parse"
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
"net/url"
"os"
"path/filepath"
"spider_creator/backend"
"strconv"
"strings"
"time"
"github.com/chromedp/cdproto/browser"
"github.com/chromedp/cdproto/network"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp"
"github.com/yuin/gopher-lua"
be "spider_creator/backend"
)
const (
selector_type_id = 0
selector_type_query = 1
selector_type_search = 2
selector_type_jspath = 3
selector_type_query_all = 4
execute_return_type_string = 0
execute_return_type_list = 1
execute_return_type_table = 2
qlm_list_lua = "/qlm_list.lua"
qlm_detail_lua = "/qlm_detail.lua"
)
var (
DataCache = make(chan map[string]interface{}, 2000)
Datas []map[string]interface{}
)
type GLVm struct {
ScriptDir string
LogsDir string
LogsFile *os.File
Dnf backend.EventNotifyFace
Headless bool
ShowImage bool
ProxyServer bool
ProxyAddr string
B *GLBrowser
ScriptRunning bool //控制一次只能执行一个脚本
DataSaveOver chan bool
}
type GLBrowser struct {
Ctx context.Context
CancelFn context.CancelFunc
}
func NewGLVM(scriptDir, logsDir string, dnf be.EventNotifyFace) *GLVm {
return &GLVm{
ScriptDir: scriptDir,
LogsDir: logsDir,
Dnf: dnf,
DataSaveOver: make(chan bool, 1),
}
}
// LoadScript 加载脚本
func (glvm *GLVm) LoadScript(page string) string {
var path string
if page == "list" {
path = glvm.ScriptDir + qlm_list_lua
} else if page == "detail" {
path = glvm.ScriptDir + qlm_detail_lua
}
bs, err := os.ReadFile(path)
if err != nil {
qu.Debug(path, "脚本加载失败...")
}
return string(bs)
}
// RunScript 执行lua代码
func (glvm *GLVm) RunScript(script, recordId string) error {
defer qu.Catch()
var s *lua.LState = lua.NewState()
defer s.Close()
//日志文件
now := time.Now()
path := glvm.LogsDir + fmt.Sprintf("/%s.log", qu.FormatDate(&now, qu.Date_Short_Layout))
qu.Debug("log path:", path)
file, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0666)
if err != nil {
qu.Debug("日志创建失败:", err)
return err
}
glvm.LogsFile = file
defer glvm.LogsFile.Close()
//方法绑定
glvm.ResetBrowser() //先创建浏览器对象
glvm.BindLuaState(s) //绑定虚拟机函数
glvm.B.BindLuaState(s, recordId)
defer func() {
if b := glvm.B; b != nil {
b.CancelFn()
b.Ctx = nil
b.CancelFn = nil
b = nil
}
}()
reader := strings.NewReader(script)
chunk, err := parse.Parse(reader, "code")
if err != nil {
return err
}
proto, err := lua.Compile(chunk, script)
if err != nil {
return err
}
lfunc := s.NewFunctionFromProto(proto)
s.Push(lfunc)
s.Call(0, 0)
return nil
}
// ResetBrowser 重置浏览器
func (glvm *GLVm) ResetBrowser() {
if glvm.B != nil && glvm.B.CancelFn != nil {
glvm.B.CancelFn()
glvm.B.Ctx = nil
glvm.B.CancelFn = nil
}
_, _, _, _, ctx, incCancelFn := backend.NewBrowser(glvm.Headless, glvm.ShowImage, glvm.ProxyServer, "http://")
b := &GLBrowser{
Ctx: ctx,
CancelFn: incCancelFn,
}
if glvm.B == nil {
glvm.B = b
} else {
glvm.B.Ctx, glvm.B.CancelFn = b.Ctx, b.CancelFn
}
}
// BindLuaState 绑定虚拟机函数
func (glvm *GLVm) BindLuaState(s *lua.LState) {
s.SetGlobal("browser_reset", s.NewFunction(func(l *lua.LState) int {
glvm.ResetBrowser()
return 0
}))
s.SetGlobal("browser_savelog", s.NewFunction(func(l *lua.LState) int {
text := l.ToString(-1)
qu.Debug("log:", text)
now := time.Now()
glvm.LogsFile.Write([]byte(fmt.Sprintf("%s%s%s%s", qu.FormatDate(&now, qu.Date_Full_Layout), "---", text, "\n")))
return 0
}))
}
func (glvm *GLVm) CloseTabs() {
if glvm.B != nil && glvm.B.CancelFn != nil {
glvm.B.CancelFn()
glvm.B.Ctx = nil
glvm.B.CancelFn = nil
glvm.B = nil
}
}
// findTab 根据标题、url找tab
func (b *GLBrowser) findTabContext(tabTitle, tabUrl string, timeoutInt64 int64) (ctx context.Context, err error) {
if b.Ctx != nil {
if timeoutInt64 == 0 {
timeoutInt64 = 5000
}
timeout := time.Duration(timeoutInt64) * time.Millisecond
if tabTitle == "" && tabUrl == "" {
ctx, _ = context.WithTimeout(b.Ctx, timeout)
return ctx, nil
} else {
ts, err := chromedp.Targets(b.Ctx)
if err != nil {
return nil, err
}
for _, t := range ts {
if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) || (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
// log.Printf("find tab param
: %s %s found %s %s", tabTitle, tabUrl,
// t.Title, t.URL)
newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
ctx, _ = context.WithTimeout(newCtx, timeout)
return ctx, nil
}
}
}
return nil, errors.New("can't find tab")
}
return nil, errors.New("context is error")
}
// CloseTabs 关闭页面
func (b *GLBrowser) CloseTabs(tabTitle, tabUrl string, timeoutInt64 int64) (err error) {
if timeoutInt64 == 0 {
timeoutInt64 = 5
}
timeout := time.Duration(timeoutInt64) * time.Millisecond
ts, err := chromedp.Targets(b.Ctx)
if err != nil {
return err
}
for _, t := range ts {
if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) || (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
ctx, _ := context.WithTimeout(newCtx, timeout)
chromedp.Run(
ctx,
page.Close(),
)
}
}
return nil
}
// Navigate 导航到指定网址
func (b *GLBrowser) Navigate(tabTitle string, tabUrl string, isNewTab bool, targetUrl string, timeout int64) (err error) {
ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
if err != nil {
return err
}
//新标签页
if isNewTab {
ctx, _ = chromedp.NewContext(ctx)
}
//
return chromedp.Run(ctx,
chromedp.Navigate(targetUrl))
}
// Navigate 导航到指定网址,并保存请求资源,如图片等
func (b *GLBrowser) NavigateAndSaveRes(tabTitle string, tabUrl string, timeout int64, isNewTab bool, targetUrl string, saveFileTypeList, save2dir string) (err error) {
ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
if err != nil {
return err
}
//新标签页
if isNewTab {
ctx, _ = chromedp.NewContext(ctx)
}
//
saveFileType := strings.Split(saveFileTypeList, " ")
isNeedRes := func(fileType string) bool {
for _, v := range saveFileType {
if strings.Contains(fileType, v) {
return true
}
}
return false
}
fnURL2FileName := func(requestURL string) string {
u, err := url.Parse(requestURL)
if err != nil {
return ""
}
_, filename := filepath.Split(u.Path)
return filename
}
var cache = map[network.RequestID]string{}
chromedp.ListenTarget(ctx, func(v interface{}) {
switch ev := v.(type) {
case *network.EventRequestWillBeSent: //准备下载
cache[ev.RequestID] = ev.Request.URL
case *network.EventResponseReceived: //检查回应头的contenttype
contentType, _ := ev.Response.Headers["Content-Type"].(string)
fmt.Println(contentType)
if !isNeedRes(contentType) {
delete(cache, ev.RequestID)
}
case *network.EventLoadingFinished: //下载完成
if uri, ok := cache[ev.RequestID]; ok {
filename := fnURL2FileName(uri)
fmt.Println("save2file", filename)
if filename != "" {
filePath := filepath.Join(save2dir, filename)
var buf []byte
if err := chromedp.Run(ctx, chromedp.ActionFunc(func(ctx context.Context) error {
var err error
buf, err = network.GetResponseBody(ev.RequestID).Do(ctx)
return err
})); err == nil {
os.WriteFile(filePath, buf, 0777)
} else {
fmt.Println(err.Error())
}
}
}
}
})
//
err = chromedp.Run(ctx,
chromedp.Navigate(targetUrl))
//下载存储
return err
}
// ExecuteJS 执行脚本
func (b *GLBrowser) ExecuteJS(tabTitle, tabUrl, script string, ret interface{}, timeout int64) (err error) {
ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
if err != nil {
return err
}
return chromedp.Run(ctx,
chromedp.Evaluate(script, ret))
}
// Click 点击
func (b *GLBrowser) Click(tabTitle, tabUrl, selector string, selectorType int, timeout int64) (err error) {
ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
if err != nil {
return err
}
var act chromedp.QueryAction
switch selectorType {
case selector_type_id:
act = chromedp.Click(selector, chromedp.ByID)
case selector_type_query:
act = chromedp.Click(selector, chromedp.ByQuery)
case selector_type_search:
act = chromedp.Click(selector, chromedp.BySearch)
case selector_type_jspath:
act = chromedp.Click(selector, chromedp.ByJSPath)
default:
act = chromedp.Click(selector, chromedp.ByQueryAll)
}
err = chromedp.Run(ctx,
act)
return err
}
// KeySend 键盘输入
func (b *GLBrowser) KeySend(tabTitle, tabUrl, selector, sendStr string, selectorType int, timeout int64) (err error) {
ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
if err != nil {
return err
}
var act chromedp.QueryAction
switch selectorType {
case selector_type_id:
act = chromedp.SendKeys(selector, sendStr, chromedp.ByID)
case selector_type_query:
act = chromedp.SendKeys(selector, sendStr, chromedp.ByQuery)
case selector_type_search:
act = chromedp.SendKeys(selector, sendStr, chromedp.BySearch)
case selector_type_jspath:
act = chromedp.SendKeys(selector, sendStr, chromedp.ByJSPath)
default:
act = chromedp.SendKeys(selector, sendStr, chromedp.ByQueryAll)
}
return chromedp.Run(ctx,
act)
}
// WaitVisible 等待元素可见
func (b *GLBrowser) WaitVisible(tabTitle, tabUrl, selector string, selectorType int, timeout int64) error {
ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
if err != nil {
return err
}
var act chromedp.QueryAction
switch selectorType {
case selector_type_id:
act = chromedp.WaitVisible(selector, chromedp.ByID)
case selector_type_query:
act = chromedp.WaitVisible(selector, chromedp.ByQuery)
case selector_type_search:
act = chromedp.WaitVisible(selector, chromedp.BySearch)
case selector_type_jspath:
act = chromedp.WaitVisible(selector, chromedp.ByJSPath)
default:
act = chromedp.WaitVisible(selector, chromedp.ByQueryAll)
}
return chromedp.Run(ctx,
act)
}
// 重置浏览器
func (b *GLBrowser) Reset() {
}
// DownloadFile 只有在非headless模式下有效,与click方法其实是一致的
func (b *GLBrowser) DownloadFile(tabTitle, tabUrl string, timeout int64, selector string, selectorType int, save2dir string) error {
ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
if err != nil {
return err
}
var act chromedp.QueryAction
switch selectorType {
case selector_type_id:
act = chromedp.Click(selector, chromedp.ByID)
case selector_type_query:
act = chromedp.Click(selector, chromedp.ByQuery)
case selector_type_search:
act = chromedp.Click(selector, chromedp.BySearch)
case selector_type_jspath:
act = chromedp.Click(selector, chromedp.ByJSPath)
default:
act = chromedp.Click(selector, chromedp.ByQueryAll)
}
return chromedp.Run(ctx,
browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).WithDownloadPath(save2dir).WithEventsEnabled(true),
act)
}
// BindLuaState
func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
//执行暂停
s.SetGlobal("browser_sleep", s.NewFunction(func(l *lua.LState) int {
fmt.Println("---browser_sleep---")
timeout := l.ToInt64(-1)
if timeout == 0 {
timeout = 5
}
time.Sleep(time.Duration(timeout) * time.Millisecond)
return 0
}))
//关闭tabl页
s.SetGlobal("browser_closetabs", s.NewFunction(func(l *lua.LState) int {
fmt.Println("---browser_closetabs---")
timeout := l.ToInt64(-3)
tabTitle := l.ToString(-2)
tabUrl := l.ToString(-1)
if timeout == 0 {
timeout = 5
}
b.CloseTabs(tabTitle, tabUrl, timeout)
return 0
}))
//注册打开地址
s.SetGlobal("browser_navagite", s.NewFunction(func(l *lua.LState) int {
fmt.Println("---browser_navagite---")
tabTitle := l.ToString(-5) //指定标签页title
tabUrl := l.ToString(-4) //指定标签页url
isNewTab := l.ToBool(-3) //是否打开新的标签页
timeout := l.ToInt64(-2) //网页打开的超时时间
targetUrl := l.ToString(-1) //打开网页的链接
if err := b.Navigate(tabTitle, tabUrl, isNewTab, targetUrl, timeout); err != nil {
l.Push(lua.LString(err.Error()))
} else {
l.Push(lua.LString("ok"))
}
return 1
}))
//执行浏览器端js
s.SetGlobal("browser_executejs", s.NewFunction(func(l *lua.LState) int {
fmt.Println("---browser_executejs---")
tabTitle := l.ToString(-5)
tabUrl := l.ToString(-4)
timeout := l.ToInt64(-3)
returnType := l.ToInt(-2) //返回数据类型
script := l.ToString(-1) //执行的js
switch returnType {
case execute_return_type_string: //返回string
var ret string
if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
l.Push(lua.LString("ok"))
l.Push(lua.LString(ret))
} else {
l.Push(lua.LString("err"))
l.Push(lua.LString(err.Error()))
}
case execute_return_type_list: //返回list
var ret = make([]interface{}, 0, 0)
var tmp = make(map[string]interface{})
if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
for i, v := range ret {
tmp[strconv.Itoa(i)] = v
}
l.Push(lua.LString("ok"))
l.Push(MapToTable(tmp))
} else {
l.Push(lua.LString("err"))
l.Push(lua.LString(err.Error()))
}
case execute_return_type_table: //返回table
var ret = make(map[string]interface{})
if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
l.Push(lua.LString("ok"))
l.Push(MapToTable(ret))
} else {
l.Push(lua.LString("err"))
l.Push(lua.LString(err.Error()))
}
}
return 2
}))
//按键
s.SetGlobal("browser_keysend", s.NewFunction(func(l *lua.LState) int {
fmt.Println("---browser_keysend---")
tabTitle := l.ToString(-6)
tabUrl := l.ToString(-5)
timeout := l.ToInt64(-4)
words := l.ToString(-3)
selectorType := l.ToInt(-2)
selector := l.ToString(-1)
fmt.Println(selector, words, selectorType, timeout)
err := b.KeySend(tabTitle, tabUrl, selector, words, selectorType, timeout)
if err != nil {
l.Push(lua.LString(err.Error()))
} else {
l.Push(lua.LString("ok"))
}
return 1
}))
//点击
s.SetGlobal("browser_click", s.NewFunction(func(l *lua.LState) int {
fmt.Println("---browser_click---")
tabTitle := l.ToString(-5)
tabUrl := l.ToString(-4)
timeout := l.ToInt64(-3)
selectorType := l.ToInt(-2)
selector := l.ToString(-1)
err := b.Click(tabTitle, tabUrl, selector, selectorType, timeout)
if err != nil {
l.Push(lua.LString(err.Error()))
} else {
l.Push(lua.LString("ok"))
}
return 1
}))
//等待元素加载
s.SetGlobal("browser_waitvisible", s.NewFunction(func(l *lua.LState) int {
fmt.Println("---browser_waitvisible---")
tabTitle := l.ToString(-5)
tabUrl := l.ToString(-4)
timeout := l.ToInt64(-3)
selectorType := l.ToInt(-2) //选择器类型
selector := l.ToString(-1) //选择器
err := b.WaitVisible(tabTitle, tabUrl, selector, selectorType, timeout)
if err != nil {
l.Push(lua.LString(err.Error()))
} else {
l.Push(lua.LString("ok"))
}
return 1
}))
//下载附件
s.SetGlobal("browser_downloadfile", s.NewFunction(func(l *lua.LState) int {
tabTitle := l.ToString(-6)
tabUrl := l.ToString(-5)
timeout := l.ToInt64(-4)
selectorType := l.ToInt(-3)
selector := l.ToString(-2)
save2dir := l.ToString(-1)
err := b.DownloadFile(tabTitle, tabUrl, timeout, selector, selectorType, save2dir)
if err != nil {
l.Push(lua.LString(err.Error()))
} else {
l.Push(lua.LString("ok"))
}
return 1
}))
//注册打开地址
s.SetGlobal("browser_navagite_download_res", s.NewFunction(func(l *lua.LState) int {
tabTitle := l.ToString(-7)
tabUrl := l.ToString(-6)
timeout := l.ToInt64(-5)
isNewTab := l.ToBool(-4)
targetUrl := l.ToString(-3)
saveFileTypeList := l.ToString(-2)
savedir := l.ToString(-1)
if err := b.NavigateAndSaveRes(tabTitle, tabUrl, timeout, isNewTab, targetUrl, saveFileTypeList, savedir); err != nil {
l.Push(lua.LString(err.Error()))
} else {
l.Push(lua.LString("ok"))
}
return 1
}))
//发布时间格式化
s.SetGlobal("browser_publishtime", s.NewFunction(func(l *lua.LState) int {
text := l.ToString(-1)
publishtime := getPublitime(text)
l.Push(lua.LString(publishtime))
return 1
}))
//保存数据
s.SetGlobal("browser_savedata", s.NewFunction(func(l *lua.LState) int {
//fmt.Println("---browser_savedata---")
page := l.ToString(-2)
data := l.ToTable(-1)
result := TableToMap(data)
if page == "list" {
result["recordid"] = recordId
}
DataCache <- result
return 1
}))
//获取数据
s.SetGlobal("browser_getdata", s.NewFunction(func(l *lua.LState) int {
fmt.Println("---browser_getdata---")
num := l.ToInt(-1) //获取多少条数据
count := len(Datas)
if count == 0 {
l.Push(lua.LString("err"))
l.Push(lua.LString("当前可下载量为0"))
} else {
if count < num {
num = count
}
data := Datas[:num]
Datas = Datas[num:]
tMap := MapToTable(map[string]interface{}{"data": data})
l.Push(lua.LString("ok"))
l.Push(tMap.RawGetString("data"))
}
return 2
}))
}