package backend import ( "context" "encoding/json" "fmt" "github.com/chromedp/cdproto/fetch" "github.com/chromedp/cdproto/network" "io/ioutil" "math/rand" "net/http" "strings" "time" "github.com/chromedp/cdproto/page" "github.com/chromedp/chromedp" ) var ( useragent = []string{ "Chrome: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36", "Firefox: Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0", //"Safari: Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_5 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0 Mobile/15D60 Safari/604.1", "MacOSX: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14", "Mozilla/5.0(Macintosh;U;IntelMacOSX10_6_8;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50", "Mozilla/5.0(Windows;U;WindowsNT6.1;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50", "Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1", "Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1", "Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11", "Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Trident/4.0;SE2.XMetaSr1.0;SE2.XMetaSr1.0;.NETCLR2.0.50727;SE2.XMetaSr1.0)", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.70 Safari/537.36", "Chrome 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36", "Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15", "Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15", "Safari 11 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15 QQBrowserLite/1.3.0", "Chrome 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36", "Chrome 59 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", "Chrome 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36", "Safari 11 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5", "Firefox 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:93.0) Gecko/20100101 Firefox/93.0", "Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15", "Chrome 8 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36", "Chrome Mozilla/5.0 (X11; U; U; Linux x86_64; zh-my) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 Puffin/8.3.1.41624AP", "Opera 28 Mozilla/5.0 (Linux; BRAVIA 4K 2015 Build/LMY48E.S265) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36 OPR/28.0.1754.0", "Safari Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 HeyTapBrowser/40.7.29.1", "Chrome 9 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.58 Safari/537.36 Edg/93.0.961.33", "Chrome 9 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/15.0 Chrome/90.0.4430.210 Safari/537.36", "Chrome 9 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36", "Chrome Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", "Microsoft Edge Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134", "Chrome 8 Mozilla/5.0 (Windows NT 10.0; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", "Chrome 8 Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36", "Chrome 9 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36", "Chrome 8 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36", "Chrome 9 Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "Chrome Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Firefox 7 Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0", "Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36", "Internet Explorer 11 Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; LCJB; rv:11.0) like Gecko", "Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36", "Firefox 36 Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0", "Chrome Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3870.400 QQBrowser/10.8.4405.400", "Chrome 58 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0", "Firefox 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0", "Chrome 8 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36", "Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Edg/94.0.992.38", } ) func NewBrowser(headless bool, showImage bool, proxyServe bool, baseUrl string) (context.Context, context.CancelFunc, context.Context, context.CancelFunc, context.Context, context.CancelFunc) { ignoreCertificateErrors := false if strings.HasPrefix(baseUrl, "https") { ignoreCertificateErrors = true } ctx, cancelFn := chromedp.NewContext(context.Background()) chromeOptions := append(chromedp.DefaultExecAllocatorOptions[:], chromedp.NoDefaultBrowserCheck, //不检查默认浏览器 chromedp.Flag("enable-automation", false), // 防止监测webdriver chromedp.Flag("force-dev-mode-highlighting", true), //--设置禁止HTTP转HTTPS chromedp.Flag("disable-extensions", true), //是否禁用扩展 chromedp.Flag("disable-blink-features", "AutomationControlled"), //禁用 blink 特征 chromedp.Flag("disable-features", "SSLForcedForSafety"), // 禁用某些安全特性 chromedp.Flag("disable-features", "SSLForced"), chromedp.Flag("disable-features", "AutoupgradeToHTTPS"), chromedp.Flag("disable-features", "ImprovedHTTPSUpgrade"), chromedp.Flag("ssl-protocol", "any"), chromedp.Flag("ignore-certificate-errors-spki-list", true), //--置禁止HTTP转HTTPS 结束 chromedp.Flag("headless", headless), chromedp.Flag("user-agent", useragent[rand.Intn(20)]), //搞到底还是要在这里设置useragent chromedp.Flag("disable-keep-alive", true), chromedp.Flag("disable-gpu", true), chromedp.Flag("no-sandbox", true), chromedp.Flag("disable-dev-shm-usage", "false"), chromedp.Flag("default-browser-check", "false"), chromedp.Flag("mute-audio", "false"), chromedp.Flag("disable-web-security", true), chromedp.Flag("accept-language", `zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6`), ) if proxyServe { chromeOptions = append(chromeOptions, //chromedp.ProxyServer(fmt.Sprintf("socks5://%s", proxyServe)), chromedp.ProxyServer(GetProxyAddr()), ) } if ignoreCertificateErrors { chromeOptions = append(chromeOptions, // ignore-certificate-errors chromedp.Flag("ignore-certificate-errors", true), ) } if showImage { chromeOptions = append(chromeOptions, chromedp.Flag("blink-settings", "imagesEnabled=true"), ) } else { chromeOptions = append(chromeOptions, chromedp.Flag("blink-settings", "imagesEnabled=false"), ) } allocCtx, allocCancelFn := chromedp.NewExecAllocator(ctx, chromeOptions...) // 创建一个浏览器实例 incCtx, incCancelFn := chromedp.NewContext(allocCtx, chromedp.WithLogf(nil)) //TODO 设置浏览器网络加载超时 // 监听网络请求并设置特定资源的超时 chromedp.ListenTarget(ctx, func(ev interface{}) { switch ev := ev.(type) { case *network.EventRequestWillBeSent: // 设置超时时间 timeout := time.Duration(Cfg.BrowserLoadResourceTimeout) * time.Second // TODO 这里要检查哪些资源进行超时中断监测,默认仅资源类请求 // 配置太麻烦,先全局,所有请求类型,这个chromedp的超时上下文不一个概念 // 设置一个定时器,当超时后取消请求 go func() { select { case <-time.After(timeout): fid := fetch.RequestID(ev.RequestID) fetch.FailRequest(fid, network.ErrorReasonTimedOut).Do(ctx) } }() } }) // chromedp.Run(ctx, fetch.Enable(), chromedp.ActionFunc(func(cxt context.Context) error { _, err := page.AddScriptToEvaluateOnNewDocument("Object.defineProperty(navigator, 'webdriver', { get: () => false, });").Do(cxt) return err }), ) return ctx, cancelFn, allocCtx, allocCancelFn, incCtx, incCancelFn } func GetProxyAddr() string { proxyAddr := "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch" proxyAuthor := "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch" //获取代理 req, err := http.NewRequest(http.MethodGet, proxyAddr, nil) if err != nil { fmt.Println("get proxy request err:", err) return "" } //添加请求头 req.Header.Add("Authorization", proxyAuthor) client := http.Client{} //发送请求 resp, err := client.Do(req) if err != nil { fmt.Println("get proxy client err:", err) return "" } defer resp.Body.Close() bodyByte, err := ioutil.ReadAll(resp.Body) if err != nil { fmt.Println("get proxy read body err:", err) return "" } tmp := map[string]interface{}{} if json.Unmarshal(bodyByte, &tmp) != nil { return "" } if data, ok := tmp["data"].(map[string]interface{}); ok && len(data) > 0 { if httpProxy, ok := data["http"].(string); ok { return httpProxy } else if httpsProxy, ok := data["https"].(string); ok { return httpsProxy } } return "" }