123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- /**
- * 虚拟机
- */
- package main
- import (
- "context"
- "fmt"
- "log"
- "strings"
- "github.com/chromedp/chromedp"
- "github.com/yuin/gopher-lua"
- "github.com/yuin/gopher-lua/parse"
- )
- const (
- run_on_device_remote = iota
- run_on_device_local
- )
- type (
- //虚拟机
- VM struct {
- WsAddr string
- Headless bool
- ShowImage bool
- ProxyAddr string
- RunMode int
- DownloadPath string
- B *Browser
- S Storage
- }
- //浏览器,(不用之前封装的,这个更轻量)
- Browser struct {
- Ctx context.Context
- CancelFn context.CancelFunc
- }
- )
- // NewVM
- func NewRemoteVM(wsAddr string, s Storage) *VM {
- return &VM{WsAddr: wsAddr, RunMode: run_on_device_remote, S: s}
- }
- // NewVM
- func NewLocalVM(headless bool, showImage bool, proxyAddr, downloadPath string, s Storage) *VM {
- return &VM{Headless: headless, ProxyAddr: proxyAddr, ShowImage: showImage,
- RunMode: run_on_device_local,
- DownloadPath: downloadPath,
- S: s}
- }
- // Quit
- func (b *Browser) Quit() {
- if b != nil && b.CancelFn != nil {
- b.CancelFn()
- b.Ctx = nil
- b.CancelFn = nil
- }
- }
- // createRemoteBrowser 创建远程浏览器
- func createRemoteBrowser(wsAddr string) *Browser {
- allocCtx, cancelFn := chromedp.NewRemoteAllocator(context.TODO(),
- wsAddr)
- incCtx, _ := chromedp.NewContext(allocCtx)
- return &Browser{
- incCtx, cancelFn,
- }
- }
- // createLocalBrowser 创建本地浏览器
- func createLocalBrowser(headless,
- showImage bool,
- proxyAddr, downloadPath string) *Browser {
- baseCtx, _ := chromedp.NewContext(context.Background())
- chromeOptions := []chromedp.ExecAllocatorOption{
- chromedp.NoFirstRun,
- chromedp.NoDefaultBrowserCheck,
- chromedp.DisableGPU,
- chromedp.NoSandbox,
- chromedp.WindowSize(1920, 1080),
- chromedp.Flag("enable-automation", false), // 防止监测webdriver
- chromedp.Flag("disable-blink-features", "AutomationControlled"), //禁用 blink 特征 作者:知识货栈 https://www.bilibili.com/read/cv24371371/ 出处:bilibili
- chromedp.Flag("lang", "zh-CN"),
- chromedp.Flag("mixed-forms-disable-autofill", false), //从https转http不再检查
- chromedp.Flag("ignore-certificate-errors", true), //忽略错误
- chromedp.Flag("ignore-urlfetcher-cert-requests", true),
- chromedp.Flag("enable-automation", false), // 防止监测webdriver
- chromedp.Flag("disable-blink-features", "AutomationControlled"), //禁用 blink 特征
- chromedp.Flag("force-dev-mode-highlighting", true),
- chromedp.Flag("disable-extensions", false), //是否禁用扩展
- chromedp.Flag("headless", headless),
- chromedp.Flag("user-agent", "Chrome 9 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/93.0.4577.58 Safari/537.36 Edg/93.0.961.33"),
- chromedp.Flag("disable-keep-alive", true),
- chromedp.Flag("disable-dev-shm-usage", false),
- chromedp.Flag("default-browser-check", false),
- chromedp.Flag("disable-web-security", true), //禁用网络安全标志
- chromedp.Flag("mute-audio", false),
- chromedp.Flag("https-upgrades", "disabled"),
- chromedp.Flag("accept-language", `zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6`),
- //chromedp.Flag("blink-settings", "imagesEnabled=true"),
- //chromedp.Flag("incognito", true), //隐私模式
- chromedp.Flag("disable-cache", true), //不用缓存
- }
- if proxyAddr != "" {
- chromeOptions = append(chromeOptions,
- chromedp.ProxyServer(fmt.Sprintf("socks5://%s", proxyAddr)))
- }
- if downloadPath != "" {
- chromeOptions = append(chromeOptions,
- chromedp.Flag("download-path", downloadPath))
- }
- if showImage {
- chromeOptions = append(chromeOptions,
- chromedp.Flag("blink-settings", "imagesEnabled=true"),
- )
- } else {
- chromeOptions = append(chromeOptions,
- chromedp.Flag("blink-settings", "imagesEnabled=false"),
- )
- }
- allocCtx, _ := chromedp.NewExecAllocator(baseCtx, chromeOptions...)
- // 创建一个浏览器实例
- incCtx, incCancelFn := chromedp.NewContext(allocCtx,
- chromedp.WithLogf(log.Printf))
- return &Browser{
- incCtx, incCancelFn,
- }
- }
- // 重置浏览器
- func (vm *VM) ResetBrowser() {
- if vm.B != nil && vm.B.CancelFn != nil {
- vm.B.CancelFn()
- vm.B.Ctx = nil
- vm.B.CancelFn = nil
- }
- var b *Browser
- if vm.RunMode == run_on_device_local {
- b = createLocalBrowser(vm.Headless, vm.ShowImage, vm.ProxyAddr, vm.DownloadPath)
- } else {
- b = createRemoteBrowser(vm.WsAddr)
- }
- if vm.B == nil {
- vm.B = b
- } else {
- vm.B.Ctx, vm.B.CancelFn = b.Ctx, b.CancelFn
- }
- }
- // BindLuaState 绑定虚拟机函数
- func (vm *VM) BindLuaState(state *lua.LState) {
- state.SetGlobal("browser_reset", state.NewFunction(func(l *lua.LState) int {
- vm.ResetBrowser()
- return 0
- }))
- state.SetGlobal("browser_save", state.NewFunction(func(l *lua.LState) int {
- spiderCode := l.ToString(-5)
- siteName := l.ToString(-4)
- siteChannelName := l.ToString(-3)
- siteChannelUrl := l.ToString(-2)
- table := l.ToTable(-1)
- data := TableToMap(table)
- vm.S.Save(spiderCode, siteName, siteChannelName, siteChannelUrl, data)
- return 0
- }))
- state.SetGlobal("browser_url_last_segs", state.NewFunction(func(l *lua.LState) int {
- segs := l.ToInt(-2)
- href := l.ToString(-1)
- if segs == 0 {
- segs = 2
- }
- s := urlLastSegs(href, segs)
- l.Push(lua.LString(s))
- return 1
- }))
- //最多传10个string参数,不支持其他类型
- state.SetGlobal("browser_log", state.NewFunction(func(l *lua.LState) int {
- params := []string{}
- for i := -10; i < 0; i++ {
- p := l.ToString(i)
- if p != "" {
- params = append(params, p)
- }
- }
- if sl != nil {
- sl.Log(params...)
- }
- return 0
- }))
- }
- // runScript 执行lua代码
- func (vm *VM) RunScript(script string) error {
- defer Catch()
- var state *lua.LState = lua.NewState()
- defer state.Close()
- //方法绑定
- vm.ResetBrowser() //先创建浏览器对象
- vm.BindLuaState(state)
- vm.B.BindLuaState(state)
- defer func() {
- if vm.B != nil {
- vm.B.Quit()
- }
- }()
- reader := strings.NewReader(script)
- chunk, err := parse.Parse(reader, "code")
- if err != nil {
- return err
- }
- proto, err := lua.Compile(chunk, script)
- if err != nil {
- return err
- }
- lfunc := state.NewFunctionFromProto(proto)
- state.Push(lfunc)
- state.Call(0, 0)
- return nil
- }
|