Переглянути джерело

golua+chromedp千里马数据采集

mxs 7 місяців тому
батько
коміт
2862cc0b4e
11 змінених файлів з 825 додано та 28 видалено
  1. 7 0
      app.go
  2. 28 14
      backend/browser.go
  3. 574 0
      backend/script/script.go
  4. 125 0
      backend/script/util.go
  5. 2 2
      backend/vm/check.go
  6. 2 2
      backend/vm/jobs.go
  7. 3 3
      backend/vm/single.go
  8. 2 2
      backend/vm/worker.go
  9. 3 0
      main.go
  10. 18 0
      qianlima.go
  11. 61 5
      server.go

+ 7 - 0
app.go

@@ -7,6 +7,7 @@ import (
 	"os"
 	rt "runtime"
 	bdb "spider_creator/backend/db"
+	"spider_creator/backend/script"
 	bvm "spider_creator/backend/vm"
 	bws "spider_creator/backend/webservice"
 )
@@ -53,12 +54,18 @@ func (a *App) startup(ctx context.Context) {
 	if _, err := os.Stat(attachesDir); err != nil {
 		os.MkdirAll(attachesDir, 0777)
 	}
+	//创建千里马数据目录
+	qlmDir = baseDir + "/qlm"
+	if _, err := os.Stat(qlmDir); err != nil {
+		os.MkdirAll(qlmDir, 0777)
+	}
 	var dbfile = baseDir + "/spider.dat"
 	qu.Debug("db file:", dbfile)
 
 	db = bdb.NewSpiderDb(dbfile, a)
 	bdb.Db = db
 	vm = bvm.NewVM(attachesDir, a)
+	qlmVm = script.NewGLVM(qlmDir, a)
 	ws = bws.NewWebService(db, a, currentSpiderConfig)
 	//
 	go ws.RunHttpServe()

+ 28 - 14
backend/browser.go

@@ -7,6 +7,7 @@ import (
 	"io/ioutil"
 	"math/rand"
 	"net/http"
+	"strings"
 
 	"github.com/chromedp/cdproto/page"
 
@@ -64,29 +65,36 @@ var (
 	}
 )
 
-func NewBrowser(headless bool, showImage bool, proxyServe bool) (
-	context.Context, context.CancelFunc,
-	context.Context, context.CancelFunc,
-	context.Context, context.CancelFunc,
-) {
+func NewBrowser(headless bool, showImage bool, proxyServe bool, baseUrl string) (context.Context, context.CancelFunc, context.Context, context.CancelFunc, context.Context, context.CancelFunc) {
+	ignoreCertificateErrors := false
+	if strings.HasPrefix(baseUrl, "https") {
+		ignoreCertificateErrors = true
+	}
 	ctx, cancelFn := chromedp.NewContext(context.Background())
 
 	chromeOptions := append(chromedp.DefaultExecAllocatorOptions[:],
-		chromedp.NoDefaultBrowserCheck,                                  //不检查默认浏览器
-		chromedp.Flag("enable-automation", false),                       // 防止监测webdriver
-		chromedp.Flag("disable-blink-features", "AutomationControlled"), //禁用 blink 特征
+		chromedp.NoDefaultBrowserCheck,            //不检查默认浏览器
+		chromedp.Flag("enable-automation", false), // 防止监测webdriver
 		chromedp.Flag("force-dev-mode-highlighting", true),
-		chromedp.Flag("disable-extensions", false), //是否禁用扩展
+		//--设置禁止HTTP转HTTPS
+		chromedp.Flag("disable-extensions", true),                       //是否禁用扩展
+		chromedp.Flag("disable-blink-features", "AutomationControlled"), //禁用 blink 特征
+		chromedp.Flag("disable-features", "SSLForcedForSafety"),         // 禁用某些安全特性
+		chromedp.Flag("disable-features", "SSLForced"),
+		chromedp.Flag("disable-features", "AutoupgradeToHTTPS"),
+		chromedp.Flag("disable-features", "ImprovedHTTPSUpgrade"),
+		chromedp.Flag("ssl-protocol", "any"),
+		chromedp.Flag("ignore-certificate-errors-spki-list", true),
+		//--置禁止HTTP转HTTPS 结束
 		chromedp.Flag("headless", headless),
 		chromedp.Flag("user-agent", useragent[rand.Intn(20)]), //搞到底还是要在这里设置useragent
 		chromedp.Flag("disable-keep-alive", true),
 		chromedp.Flag("disable-gpu", true),
 		chromedp.Flag("no-sandbox", true),
-		chromedp.Flag("disable-dev-shm-usage", false),
-		chromedp.Flag("default-browser-check", false),
-		chromedp.Flag("ignore-certificate-errors", false), //忽略错误 zj_zjsggzyjyzx_zbwjgs_gc
-		chromedp.Flag("disable-web-security", true),       //禁用网络安全标志
-		chromedp.Flag("mute-audio", false),
+		chromedp.Flag("disable-dev-shm-usage", "false"),
+		chromedp.Flag("default-browser-check", "false"),
+		chromedp.Flag("mute-audio", "false"),
+		chromedp.Flag("disable-web-security", true),
 		chromedp.Flag("accept-language", `zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6`),
 	)
 	if proxyServe {
@@ -95,6 +103,12 @@ func NewBrowser(headless bool, showImage bool, proxyServe bool) (
 			chromedp.ProxyServer(GetProxyAddr()),
 		)
 	}
+	if ignoreCertificateErrors {
+		chromeOptions = append(chromeOptions,
+			//             ignore-certificate-errors
+			chromedp.Flag("ignore-certificate-errors", true),
+		)
+	}
 	if showImage {
 		chromeOptions = append(chromeOptions,
 			chromedp.Flag("blink-settings", "imagesEnabled=true"),

+ 574 - 0
backend/script/script.go

@@ -0,0 +1,574 @@
+package script
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"github.com/yuin/gopher-lua/parse"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"net/url"
+	"os"
+	"path/filepath"
+	"spider_creator/backend"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/chromedp/cdproto/browser"
+	"github.com/chromedp/cdproto/network"
+	"github.com/chromedp/cdproto/page"
+	"github.com/chromedp/chromedp"
+	"github.com/yuin/gopher-lua"
+	be "spider_creator/backend"
+)
+
+const (
+	selector_type_id        = 0
+	selector_type_query     = 1
+	selector_type_search    = 2
+	selector_type_jspath    = 3
+	selector_type_query_all = 4
+
+	execute_return_type_string = 0
+	execute_return_type_list   = 1
+	execute_return_type_table  = 2
+
+	qlm_list_lua   = "/script/qlm_list.lua"
+	qlm_detail_lua = "/script/qlm_detail.lua"
+)
+
+type GLVm struct {
+	attachesDir string
+	dnf         backend.EventNotifyFace
+	Headless    bool
+	ShowImage   bool
+	ProxyServer bool
+	ProxyAddr   string
+	B           *GLBrowser
+	//WsAddr    string
+	//RunMode   int
+	//S         Storage
+}
+
+type GLBrowser struct {
+	BaseCancelFn context.CancelFunc
+	Ctx          context.Context
+	CancelFn     context.CancelFunc
+}
+
+func NewGLVM(attachesDir string, dnf be.EventNotifyFace) *GLVm {
+	return &GLVm{
+		attachesDir: attachesDir,
+		dnf:         dnf,
+	}
+}
+
+// LoadScript 加载脚本
+func (glvm *GLVm) LoadScript(page string) string {
+	var path string
+	if page == "list" {
+		path = glvm.attachesDir + qlm_list_lua
+	} else if page == "detail" {
+		path = glvm.attachesDir + qlm_detail_lua
+	}
+
+	bs, err := os.ReadFile(path)
+	if err != nil {
+		qu.Debug(path, "脚本加载失败...")
+	}
+	return string(bs)
+}
+
+// RunScript 执行lua代码
+func (glvm *GLVm) RunScript(script string) error {
+	defer Catch()
+	var state *lua.LState = lua.NewState()
+	defer state.Close()
+	//方法绑定
+	glvm.ResetBrowser()      //先创建浏览器对象
+	glvm.BindLuaState(state) //绑定虚拟机函数
+	glvm.B.BindLuaState(state)
+	defer func() {
+		if b := glvm.B; b != nil {
+			b.CancelFn()
+			b.Ctx = nil
+			b.CancelFn = nil
+			b.BaseCancelFn()
+			b.BaseCancelFn = nil
+		}
+	}()
+
+	reader := strings.NewReader(script)
+	chunk, err := parse.Parse(reader, "code")
+	if err != nil {
+		return err
+	}
+	proto, err := lua.Compile(chunk, script)
+	if err != nil {
+		return err
+	}
+	lfunc := state.NewFunctionFromProto(proto)
+	state.Push(lfunc)
+	state.Call(0, 0)
+
+	return nil
+}
+
+// ResetBrowser 重置浏览器
+func (vm *GLVm) ResetBrowser() {
+	if vm.B != nil && vm.B.CancelFn != nil && vm.B.BaseCancelFn != nil {
+		vm.B.CancelFn()
+		vm.B.BaseCancelFn()
+		vm.B.Ctx = nil
+		vm.B.CancelFn = nil
+		vm.B.BaseCancelFn = nil
+	}
+	_, baseCancelFn, _, _, ctx, incCancelFn := backend.NewBrowser(vm.Headless, vm.ShowImage, vm.ProxyServer, "https://")
+	b := &GLBrowser{
+		BaseCancelFn: baseCancelFn,
+		Ctx:          ctx,
+		CancelFn:     incCancelFn,
+	}
+
+	if vm.B == nil {
+		vm.B = b
+	} else {
+		vm.B.Ctx, vm.B.CancelFn = b.Ctx, b.CancelFn
+	}
+}
+
+// BindLuaState 绑定虚拟机函数
+func (vm *GLVm) BindLuaState(state *lua.LState) {
+	state.SetGlobal("browser_reset", state.NewFunction(func(l *lua.LState) int {
+		vm.ResetBrowser()
+		return 0
+	}))
+	//
+	state.SetGlobal("browser_save", state.NewFunction(func(l *lua.LState) int {
+		//spiderCode := l.ToString(-5)
+		//siteName := l.ToString(-4)
+		//siteChannelName := l.ToString(-3)
+		//siteChannelUrl := l.ToString(-2)
+		/*table := l.ToTable(-1)
+		data := TableToMap(table)*/
+		//vm.S.Save(spiderCode, siteName, siteChannelName, siteChannelUrl, data)
+		return 0
+	}))
+}
+
+// findTab 根据标题、url找tab
+func (b *GLBrowser) findTabContext(tabTitle, tabUrl string, timeoutInt64 int64) (ctx context.Context, err error) {
+	if timeoutInt64 == 0 {
+		timeoutInt64 = 5000
+	}
+	timeout := time.Duration(timeoutInt64) * time.Millisecond
+	if tabTitle == "" && tabUrl == "" {
+		ctx, _ = context.WithTimeout(b.Ctx, timeout)
+		return ctx, nil
+	} else {
+		ts, err := chromedp.Targets(b.Ctx)
+		if err != nil {
+			return nil, err
+		}
+		for _, t := range ts {
+			if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) || (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
+				// log.Printf("find tab param<title,url>: %s %s found %s %s", tabTitle, tabUrl,
+				// 	t.Title, t.URL)
+				newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
+				ctx, _ = context.WithTimeout(newCtx, timeout)
+				return ctx, nil
+			}
+		}
+	}
+	return nil, errors.New("can't find tab")
+}
+
+// CloseTabs关闭页面
+func (b *GLBrowser) CloseTabs(tabTitle, tabUrl string, timeoutInt64 int64) (err error) {
+	if timeoutInt64 == 0 {
+		timeoutInt64 = 5
+	}
+	timeout := time.Duration(timeoutInt64) * time.Millisecond
+
+	ts, err := chromedp.Targets(b.Ctx)
+	if err != nil {
+		return err
+	}
+	for _, t := range ts {
+		if (tabTitle != "" && strings.Contains(t.Title, tabTitle)) || (tabUrl != "" && strings.Contains(t.URL, tabUrl)) {
+			newCtx, _ := chromedp.NewContext(b.Ctx, chromedp.WithTargetID(t.TargetID))
+			ctx, _ := context.WithTimeout(newCtx, timeout)
+			chromedp.Run(
+				ctx,
+				page.Close(),
+			)
+		}
+	}
+	return nil
+}
+
+// Navigate 导航到指定网址
+func (b *GLBrowser) Navigate(tabTitle string, tabUrl string, isNewTab bool, targetUrl string, timeout int64) (err error) {
+	ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
+	if err != nil {
+		return err
+	}
+	//新标签页
+	if isNewTab {
+		ctx, _ = chromedp.NewContext(ctx)
+	}
+	//
+	return chromedp.Run(ctx,
+		chromedp.Navigate(targetUrl))
+}
+
+// Navigate 导航到指定网址,并保存请求资源,如图片等
+func (b *GLBrowser) NavigateAndSaveRes(tabTitle string, tabUrl string, timeout int64, isNewTab bool, targetUrl string, saveFileTypeList, save2dir string) (err error) {
+	ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
+	if err != nil {
+		return err
+	}
+	//新标签页
+	if isNewTab {
+		ctx, _ = chromedp.NewContext(ctx)
+	}
+	//
+	saveFileType := strings.Split(saveFileTypeList, " ")
+	isNeedRes := func(fileType string) bool {
+		for _, v := range saveFileType {
+			if strings.Contains(fileType, v) {
+				return true
+			}
+		}
+		return false
+	}
+	fnURL2FileName := func(requestURL string) string {
+		u, err := url.Parse(requestURL)
+		if err != nil {
+			return ""
+		}
+		_, filename := filepath.Split(u.Path)
+		return filename
+	}
+	var cache = map[network.RequestID]string{}
+	chromedp.ListenTarget(ctx, func(v interface{}) {
+		switch ev := v.(type) {
+		case *network.EventRequestWillBeSent: //准备下载
+			cache[ev.RequestID] = ev.Request.URL
+		case *network.EventResponseReceived: //检查回应头的contenttype
+			contentType, _ := ev.Response.Headers["Content-Type"].(string)
+			fmt.Println(contentType)
+			if !isNeedRes(contentType) {
+				delete(cache, ev.RequestID)
+			}
+		case *network.EventLoadingFinished: //下载完成
+			if uri, ok := cache[ev.RequestID]; ok {
+				filename := fnURL2FileName(uri)
+				fmt.Println("save2file", filename)
+				if filename != "" {
+					filePath := filepath.Join(save2dir, filename)
+					var buf []byte
+					if err := chromedp.Run(ctx, chromedp.ActionFunc(func(ctx context.Context) error {
+						var err error
+						buf, err = network.GetResponseBody(ev.RequestID).Do(ctx)
+						return err
+					})); err == nil {
+						os.WriteFile(filePath, buf, 0777)
+					} else {
+						fmt.Println(err.Error())
+					}
+				}
+			}
+		}
+	})
+
+	//
+	err = chromedp.Run(ctx,
+		chromedp.Navigate(targetUrl))
+	//下载存储
+	return err
+}
+
+// ExecuteJS 执行脚本
+func (b *GLBrowser) ExecuteJS(tabTitle, tabUrl, script string, ret interface{}, timeout int64) (err error) {
+	ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
+	if err != nil {
+		return err
+	}
+	return chromedp.Run(ctx,
+		chromedp.Evaluate(script, ret))
+}
+
+// Click 点击
+func (b *GLBrowser) Click(tabTitle, tabUrl, selector string, selectorType int, timeout int64) (err error) {
+	ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
+	if err != nil {
+		return err
+	}
+	var act chromedp.QueryAction
+	switch selectorType {
+	case selector_type_id:
+		act = chromedp.Click(selector, chromedp.ByID)
+	case selector_type_query:
+		act = chromedp.Click(selector, chromedp.ByQuery)
+	case selector_type_search:
+		act = chromedp.Click(selector, chromedp.BySearch)
+	case selector_type_jspath:
+		act = chromedp.Click(selector, chromedp.ByJSPath)
+	default:
+		act = chromedp.Click(selector, chromedp.ByQueryAll)
+	}
+
+	err = chromedp.Run(ctx,
+		act)
+	return err
+
+}
+
+// KeySend 键盘输入
+func (b *GLBrowser) KeySend(tabTitle, tabUrl, selector, sendStr string, selectorType int, timeout int64) (err error) {
+	ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
+	if err != nil {
+		return err
+	}
+	var act chromedp.QueryAction
+	switch selectorType {
+	case selector_type_id:
+		act = chromedp.SendKeys(selector, sendStr, chromedp.ByID)
+	case selector_type_query:
+		act = chromedp.SendKeys(selector, sendStr, chromedp.ByQuery)
+	case selector_type_search:
+		act = chromedp.SendKeys(selector, sendStr, chromedp.BySearch)
+	case selector_type_jspath:
+		act = chromedp.SendKeys(selector, sendStr, chromedp.ByJSPath)
+	default:
+		act = chromedp.SendKeys(selector, sendStr, chromedp.ByQueryAll)
+	}
+	return chromedp.Run(ctx,
+		act)
+}
+
+// WaitVisible 等待元素可见
+func (b *GLBrowser) WaitVisible(tabTitle, tabUrl, selector string, selectorType int, timeout int64) error {
+	ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
+	if err != nil {
+		return err
+	}
+	var act chromedp.QueryAction
+	switch selectorType {
+	case selector_type_id:
+		act = chromedp.WaitVisible(selector, chromedp.ByID)
+	case selector_type_query:
+		act = chromedp.WaitVisible(selector, chromedp.ByQuery)
+	case selector_type_search:
+		act = chromedp.WaitVisible(selector, chromedp.BySearch)
+	case selector_type_jspath:
+		act = chromedp.WaitVisible(selector, chromedp.ByJSPath)
+	default:
+		act = chromedp.WaitVisible(selector, chromedp.ByQueryAll)
+	}
+	return chromedp.Run(ctx,
+		act)
+}
+
+// 重置浏览器
+func (b *GLBrowser) Reset() {
+
+}
+
+// DownloadFile 只有在非headless模式下有效,与click方法其实是一致的
+func (b *GLBrowser) DownloadFile(tabTitle, tabUrl string, timeout int64, selector string, selectorType int, save2dir string) error {
+	ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
+	if err != nil {
+		return err
+	}
+	var act chromedp.QueryAction
+	switch selectorType {
+	case selector_type_id:
+		act = chromedp.Click(selector, chromedp.ByID)
+	case selector_type_query:
+		act = chromedp.Click(selector, chromedp.ByQuery)
+	case selector_type_search:
+		act = chromedp.Click(selector, chromedp.BySearch)
+	case selector_type_jspath:
+		act = chromedp.Click(selector, chromedp.ByJSPath)
+	default:
+		act = chromedp.Click(selector, chromedp.ByQueryAll)
+	}
+
+	return chromedp.Run(ctx,
+		browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).WithDownloadPath(save2dir).WithEventsEnabled(true),
+		act)
+}
+
+// BindLuaState
+func (b *GLBrowser) BindLuaState(s *lua.LState) {
+	//执行暂停
+	s.SetGlobal("browser_sleep", s.NewFunction(func(l *lua.LState) int {
+		fmt.Println("---browser_sleep---")
+		timeout := l.ToInt64(-1)
+		if timeout == 0 {
+			timeout = 5
+		}
+		time.Sleep(time.Duration(timeout) * time.Millisecond)
+		return 0
+	}))
+	//关闭tabl页
+	s.SetGlobal("browser_closetabs", s.NewFunction(func(l *lua.LState) int {
+		fmt.Println("---browser_closetabs---")
+		timeout := l.ToInt64(-3)
+		tabTitle := l.ToString(-2)
+		tabUrl := l.ToString(-1)
+		if timeout == 0 {
+			timeout = 5
+		}
+		b.CloseTabs(tabTitle, tabUrl, timeout)
+		return 0
+	}))
+	//注册打开地址
+	s.SetGlobal("browser_navagite", s.NewFunction(func(l *lua.LState) int {
+		fmt.Println("---browser_navagite---")
+		tabTitle := l.ToString(-5)  //指定标签页title
+		tabUrl := l.ToString(-4)    //指定标签页url
+		isNewTab := l.ToBool(-3)    //是否打开新的标签页
+		timeout := l.ToInt64(-2)    //网页打开的超时时间
+		targetUrl := l.ToString(-1) //打开网页的链接
+		if err := b.Navigate(tabTitle, tabUrl, isNewTab, targetUrl, timeout); err != nil {
+			l.Push(lua.LString(err.Error()))
+		} else {
+			l.Push(lua.LString("ok"))
+		}
+		return 1
+	}))
+	//执行浏览器端js
+	s.SetGlobal("browser_executejs", s.NewFunction(func(l *lua.LState) int {
+		fmt.Println("---browser_executejs---")
+		tabTitle := l.ToString(-5)
+		tabUrl := l.ToString(-4)
+		timeout := l.ToInt64(-3)
+		returnType := l.ToInt(-2) //返回数据类型
+		script := l.ToString(-1)  //执行的js
+
+		switch returnType {
+		case execute_return_type_string: //返回string
+			var ret string
+			if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
+				l.Push(lua.LString("ok"))
+				l.Push(lua.LString(ret))
+			} else {
+				l.Push(lua.LString("err"))
+				l.Push(lua.LString(err.Error()))
+			}
+		case execute_return_type_list: //返回list
+			var ret = make([]interface{}, 0, 0)
+			var tmp = make(map[string]interface{})
+			if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
+				for i, v := range ret {
+					tmp[strconv.Itoa(i)] = v
+				}
+				l.Push(lua.LString("ok"))
+				l.Push(MapToTable(tmp))
+			} else {
+				l.Push(lua.LString("err"))
+				l.Push(lua.LString(err.Error()))
+			}
+		case execute_return_type_table: //返回table
+			var ret = make(map[string]interface{})
+			if err := b.ExecuteJS(tabTitle, tabUrl, script, &ret, timeout); err == nil {
+				l.Push(lua.LString("ok"))
+				l.Push(MapToTable(ret))
+			} else {
+				l.Push(lua.LString("err"))
+				l.Push(lua.LString(err.Error()))
+			}
+		}
+		return 2
+	}))
+	//按键
+	s.SetGlobal("browser_keysend", s.NewFunction(func(l *lua.LState) int {
+		fmt.Println("---browser_keysend---")
+		tabTitle := l.ToString(-6)
+		tabUrl := l.ToString(-5)
+		timeout := l.ToInt64(-4)
+		words := l.ToString(-3)
+		selectorType := l.ToInt(-2)
+		selector := l.ToString(-1)
+		fmt.Println(selector, words, selectorType, timeout)
+		err := b.KeySend(tabTitle, tabUrl, selector, words, selectorType, timeout)
+		if err != nil {
+			l.Push(lua.LString(err.Error()))
+		} else {
+			l.Push(lua.LString("ok"))
+		}
+		return 1
+	}))
+	//点击
+	s.SetGlobal("browser_click", s.NewFunction(func(l *lua.LState) int {
+		fmt.Println("---browser_click---")
+		tabTitle := l.ToString(-5)
+		tabUrl := l.ToString(-4)
+		timeout := l.ToInt64(-3)
+		selectorType := l.ToInt(-2)
+		selector := l.ToString(-1)
+
+		err := b.Click(tabTitle, tabUrl, selector, selectorType, timeout)
+		if err != nil {
+			l.Push(lua.LString(err.Error()))
+		} else {
+			l.Push(lua.LString("ok"))
+		}
+		return 1
+	}))
+
+	s.SetGlobal("browser_waitvisible", s.NewFunction(func(l *lua.LState) int {
+		fmt.Println("---browser_waitvisible---")
+		tabTitle := l.ToString(-5)
+		tabUrl := l.ToString(-4)
+		timeout := l.ToInt64(-3)
+		selectorType := l.ToInt(-2) //选择器类型
+		selector := l.ToString(-1)  //选择器
+
+		err := b.WaitVisible(tabTitle, tabUrl, selector, selectorType, timeout)
+		if err != nil {
+			l.Push(lua.LString(err.Error()))
+		} else {
+			l.Push(lua.LString("ok"))
+		}
+		return 1
+	}))
+
+	//点击
+	s.SetGlobal("browser_downloadfile", s.NewFunction(func(l *lua.LState) int {
+		tabTitle := l.ToString(-6)
+		tabUrl := l.ToString(-5)
+		timeout := l.ToInt64(-4)
+		selectorType := l.ToInt(-3)
+		selector := l.ToString(-2)
+		save2dir := l.ToString(-1)
+
+		err := b.DownloadFile(tabTitle, tabUrl, timeout, selector, selectorType, save2dir)
+		if err != nil {
+			l.Push(lua.LString(err.Error()))
+		} else {
+			l.Push(lua.LString("ok"))
+		}
+		return 1
+	}))
+
+	//注册打开地址
+	s.SetGlobal("browser_navagite_download_res", s.NewFunction(func(l *lua.LState) int {
+		tabTitle := l.ToString(-7)
+		tabUrl := l.ToString(-6)
+		timeout := l.ToInt64(-5)
+		isNewTab := l.ToBool(-4)
+		targetUrl := l.ToString(-3)
+		saveFileTypeList := l.ToString(-2)
+		savedir := l.ToString(-1)
+		if err := b.NavigateAndSaveRes(tabTitle, tabUrl, timeout, isNewTab, targetUrl, saveFileTypeList, savedir); err != nil {
+			l.Push(lua.LString(err.Error()))
+		} else {
+			l.Push(lua.LString("ok"))
+		}
+		return 1
+	}))
+}

+ 125 - 0
backend/script/util.go

@@ -0,0 +1,125 @@
+package script
+
+import (
+	"fmt"
+	lua "github.com/yuin/gopher-lua"
+	"log"
+	"time"
+)
+
+func Catch() {
+	if err := recover(); err != nil {
+		log.Println(err)
+	}
+}
+
+// MapToTable converts a Go map to a lua table
+func MapToTable(m map[string]interface{}) *lua.LTable {
+	// Main table pointer
+	resultTable := &lua.LTable{}
+
+	// Loop map
+	for key, element := range m {
+
+		switch element.(type) {
+		case float64:
+			resultTable.RawSetString(key, lua.LNumber(element.(float64)))
+		case int64:
+			resultTable.RawSetString(key, lua.LNumber(element.(int64)))
+		case string:
+			resultTable.RawSetString(key, lua.LString(element.(string)))
+		case bool:
+			resultTable.RawSetString(key, lua.LBool(element.(bool)))
+		case []byte:
+			resultTable.RawSetString(key, lua.LString(string(element.([]byte))))
+		case map[string]interface{}:
+
+			// Get table from map
+			tble := MapToTable(element.(map[string]interface{}))
+
+			resultTable.RawSetString(key, tble)
+
+		case time.Time:
+			resultTable.RawSetString(key, lua.LNumber(element.(time.Time).Unix()))
+
+		case []map[string]interface{}:
+
+			// Create slice table
+			sliceTable := &lua.LTable{}
+
+			// Loop element
+			for _, s := range element.([]map[string]interface{}) {
+
+				// Get table from map
+				tble := MapToTable(s)
+
+				sliceTable.Append(tble)
+			}
+
+			// Set slice table
+			resultTable.RawSetString(key, sliceTable)
+
+		case []interface{}:
+
+			// Create slice table
+			sliceTable := &lua.LTable{}
+
+			// Loop interface slice
+			for _, s := range element.([]interface{}) {
+
+				// Switch interface type
+				switch s.(type) {
+				case map[string]interface{}:
+
+					// Convert map to table
+					t := MapToTable(s.(map[string]interface{}))
+
+					// Append result
+					sliceTable.Append(t)
+
+				case float64:
+
+					// Append result as number
+					sliceTable.Append(lua.LNumber(s.(float64)))
+
+				case string:
+
+					// Append result as string
+					sliceTable.Append(lua.LString(s.(string)))
+
+				case bool:
+
+					// Append result as bool
+					sliceTable.Append(lua.LBool(s.(bool)))
+				}
+			}
+
+			// Append to main table
+			resultTable.RawSetString(key, sliceTable)
+		}
+	}
+
+	return resultTable
+}
+
+// TabletoMap converts a lua table to go map
+func TableToMap(t *lua.LTable) map[string]interface{} {
+	ret := make(map[string]interface{})
+	t.ForEach(func(k, v lua.LValue) {
+		key := fmt.Sprint(k)
+		if val, ok := v.(*lua.LTable); ok {
+			ret[key] = TableToMap(val)
+		} else {
+			if val, ok := v.(lua.LString); ok {
+				ret[key] = string(val)
+			} else if val, ok := v.(lua.LNumber); ok {
+				ret[key] = int64(val)
+			} else if val, ok := v.(lua.LBool); ok {
+				ret[key] = bool(val)
+			} else {
+				ret[key] = fmt.Sprint(v)
+			}
+		}
+	})
+	return ret
+}

+ 2 - 2
backend/vm/check.go

@@ -16,8 +16,8 @@ func (vm *VM) VerifySpiderConfig(sc *be.SpiderConfig) (*be.SpiderConfigVerifyRes
 	verifyResult := list.New()
 	be.DataResults[sc.Code] = verifyResult
 	ret := &be.SpiderConfigVerifyResult{false, false, false, false, false, false, false}
-	_, baseCancelFn, _, _, ctx, incCancelFn := be.NewBrowser(false, false, false)   //列表页使用
-	_, baseCancelFn2, _, _, ctx2, incCancelFn2 := be.NewBrowser(false, true, false) //详情页使用
+	_, baseCancelFn, _, _, ctx, incCancelFn := be.NewBrowser(false, false, false, sc.Href)   //列表页使用
+	_, baseCancelFn2, _, _, ctx2, incCancelFn2 := be.NewBrowser(false, true, false, sc.Href) //详情页使用
 	defer func() {
 		incCancelFn2()
 		baseCancelFn2()

+ 2 - 2
backend/vm/jobs.go

@@ -56,8 +56,8 @@ func (vm *VM) RunJob(code string) {
 	vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code, Act: be.JOB_RUNNING_EVENT_DEBUG, Msg: "加载作业完成"})
 	no := 1
 	//加载参数
-	_, baseCancelFn, _, _, ctx, incCancelFn := be.NewBrowser(true, false, false)    //列表页使用
-	_, baseCancelFn2, _, _, ctx2, incCancelFn2 := be.NewBrowser(true, false, false) //详情页使用
+	_, baseCancelFn, _, _, ctx, incCancelFn := be.NewBrowser(true, false, false, "https://")    //列表页使用
+	_, baseCancelFn2, _, _, ctx2, incCancelFn2 := be.NewBrowser(true, false, false, "https://") //详情页使用
 	defer func() {
 		job.State = 0
 		job.Progress = 0

+ 3 - 3
backend/vm/single.go

@@ -30,7 +30,7 @@ func (vm *VM) RunSpiderTmp(url string, maxPages int, listDealy, trunPageDelay, c
 	if url != "" {
 		sc.Href = url
 	}
-	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe)
+	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe, sc.Href)
 	qu.Debug("1浏览器打开", *sc)
 	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
 	defer func() {
@@ -116,7 +116,7 @@ func (vm *VM) RunSpider(url string, maxPages int, listDealy int64, contentDelay
 	if url != "" {
 		sc.Href = url
 	}
-	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe)
+	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe, sc.Href)
 	qu.Debug("1浏览器打开", *sc)
 	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
 	defer func() {
@@ -336,7 +336,7 @@ func (vm *VM) InitPageTmp(ctx context.Context, timeout int) bool {
 func (vm *VM) CountYestodayArts(url string, listDealy int64, trunPageDelay int64,
 	headless bool, showImage bool, exit chan bool, currentSpiderConfig *be.SpiderConfig) (count int) {
 	sc := be.MergeSpiderConfig(currentSpiderConfig, &be.SpiderConfig{Href: url})
-	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, false)
+	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, false, sc.Href)
 	qu.Debug("1浏览器打开")
 	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
 	defer func() {

+ 2 - 2
backend/vm/worker.go

@@ -23,7 +23,7 @@ func (w *Worker) Destory() {
 
 // NewWorker
 func NewWorker(headless bool, showImage bool, proxyServe bool, contentDelay int64, js string, vm *VM) *Worker {
-	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe)
+	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe, "https://")
 	return &Worker{
 		baseCancel:   baseCancel,
 		incCancel:    cancel,
@@ -76,7 +76,7 @@ func (vm *VM) RunSpiderMulThreads(url string, maxPages int, listDealy int64, tru
 	if url != "" {
 		sc.Href = url
 	}
-	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe)
+	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe, sc.Href)
 	qu.Debug("1浏览器打开")
 	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
 	defer func() {

+ 3 - 0
main.go

@@ -5,6 +5,7 @@ import (
 	"embed"
 	be "spider_creator/backend"
 	bdb "spider_creator/backend/db"
+	"spider_creator/backend/script"
 	bvm "spider_creator/backend/vm"
 	bws "spider_creator/backend/webservice"
 
@@ -20,9 +21,11 @@ var (
 	db                   *bdb.SpiderDb
 	exitCh               chan bool
 	baseDir, attachesDir string           = ".", ""
+	qlmDir               string           = ""
 	currentSpiderConfig  *be.SpiderConfig = new(be.SpiderConfig)
 	currentResults                        = list.New() //b.ResultItems = make(b.ResultItems, 0)
 	vm                   *bvm.VM
+	qlmVm                *script.GLVm
 	ws                   *bws.WebService
 )
 

+ 18 - 0
qianlima.go

@@ -0,0 +1,18 @@
+package main
+
+// QlmListDownload 千里马列表页数据下载
+func (a *App) QlmListDownload() {
+	//读取列表页lua脚本
+	qlmVm.LoadScript("list")
+}
+
+// QlmDetailDownload 千里马详情页数据下载
+func (a *App) QlmDetailDownload() {
+	//读取详情页lua脚本
+	qlmVm.LoadScript("detail")
+}
+
+// QlmDataSave 采集数据保存
+func QlmDataSave() {
+
+}

+ 61 - 5
server.go

@@ -12,10 +12,9 @@ import (
 	"time"
 )
 
-//const HREF = "http://127.0.0.1:8091/%s" //线下测试环境
-
-const HREF = "http://visualizeld.spdata.jianyu360.com/%s" //正式库
-//const HREF = "http://visualize.spdata.jianyu360.com/%s" //临时库
+// const HREF = "http://127.0.0.1:8091/%s" //线下测试环境
+// const HREF = "http://visualizeld.spdata.jianyu360.com/%s" //正式库
+const HREF = "http://visualize.spdata.jianyu360.com/%s" //临时库
 
 type Result struct {
 	Msg  string `json:"msg"`
@@ -78,7 +77,7 @@ func (a *App) ServerActionUpdateCode(param map[string]interface{}) *Result {
 	return r
 }
 
-// ServerActionUpdateCode 爬虫状态更新
+// ServerActionUpdateCodeState 爬虫状态更新
 func (a *App) ServerActionUpdateCodeState(param map[string]interface{}) *Result {
 	qu.Debug("param---", param)
 	/*
@@ -119,6 +118,63 @@ func (a *App) ServerActionUpdateCodeState(param map[string]interface{}) *Result
 	return r
 }
 
+// ServerActionQlmNewRecord 新增千里马采集记录
+func (a *App) ServerActionQlmNewRecord(param map[string]interface{}) *Result {
+	qu.Debug("param---", param)
+	r := &Result{}
+	//前期校验
+	if User != nil {
+		getResult(map[string]interface{}{"param": param, "user": User}, r, "newRecord")
+	} else {
+		r.Msg = "用户登录异常,请重新登录!"
+		qu.Debug(r.Msg)
+	}
+	return r
+}
+
+// ServerActionQlmRemoveRepeat 千里马数据去重
+func (a *App) ServerActionQlmRemoveRepeat(param map[string]interface{}) *Result {
+	qu.Debug("param---", param)
+	r := &Result{}
+	//前期校验
+	if User != nil {
+		getResult(map[string]interface{}{"param": param}, r, "removeRepeat")
+	} else {
+		r.Msg = "用户登录异常,请重新登录!"
+		qu.Debug(r.Msg)
+	}
+	return r
+}
+
+// ServerActionQlmPushData 千里马数据推送
+func (a *App) ServerActionQlmPushData(param map[string]interface{}) *Result {
+	qu.Debug("param---", param)
+	r := &Result{}
+	//前期校验
+	if User != nil {
+		getResult(map[string]interface{}{"param": param}, r, "pushData")
+	} else {
+		r.Msg = "用户登录异常,请重新登录!"
+		qu.Debug(r.Msg)
+	}
+	return r
+}
+
+// ServerActionQlmClearData 千里马数据清理
+func (a *App) ServerActionQlmClearData(param map[string]interface{}) *Result {
+	qu.Debug("param---", param)
+	r := &Result{}
+	//前期校验
+	if User != nil {
+		qu.Debug("param---", param)
+		getResult(map[string]interface{}{"param": param}, r, "clearData")
+	} else {
+		r.Msg = "用户登录异常,请重新登录!"
+		qu.Debug(r.Msg)
+	}
+	return r
+}
+
 // 格式化User对象
 func formatUser(tmp map[string]interface{}) {
 	v := reflect.ValueOf(User)