|
@@ -4,13 +4,15 @@ import (
|
|
"context"
|
|
"context"
|
|
"encoding/json"
|
|
"encoding/json"
|
|
"fmt"
|
|
"fmt"
|
|
- "github.com/chromedp/cdproto/fetch"
|
|
|
|
- "github.com/chromedp/cdproto/network"
|
|
|
|
"io/ioutil"
|
|
"io/ioutil"
|
|
"math/rand"
|
|
"math/rand"
|
|
"net/http"
|
|
"net/http"
|
|
"strings"
|
|
"strings"
|
|
- "time"
|
|
|
|
|
|
+
|
|
|
|
+ "github.com/chromedp/cdproto/cdp"
|
|
|
|
+ "github.com/chromedp/cdproto/network"
|
|
|
|
+
|
|
|
|
+ "github.com/chromedp/cdproto/fetch"
|
|
|
|
|
|
"github.com/chromedp/cdproto/page"
|
|
"github.com/chromedp/cdproto/page"
|
|
|
|
|
|
@@ -28,7 +30,7 @@ var (
|
|
"Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1",
|
|
"Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1",
|
|
"Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1",
|
|
"Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1",
|
|
"Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11",
|
|
"Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11",
|
|
- "Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Trident/4.0;SE2.XMetaSr1.0;SE2.XMetaSr1.0;.NETCLR2.0.50727;SE2.XMetaSr1.0)",
|
|
|
|
|
|
+ //"Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Trident/4.0;SE2.XMetaSr1.0;SE2.XMetaSr1.0;.NETCLR2.0.50727;SE2.XMetaSr1.0)",
|
|
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.70 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.70 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36",
|
|
"Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
|
|
"Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
|
|
@@ -42,20 +44,20 @@ var (
|
|
"Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
|
|
"Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
|
|
"Chrome 8 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
|
|
"Chrome 8 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
|
|
"Chrome Mozilla/5.0 (X11; U; U; Linux x86_64; zh-my) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 Puffin/8.3.1.41624AP",
|
|
"Chrome Mozilla/5.0 (X11; U; U; Linux x86_64; zh-my) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 Puffin/8.3.1.41624AP",
|
|
- "Opera 28 Mozilla/5.0 (Linux; BRAVIA 4K 2015 Build/LMY48E.S265) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36 OPR/28.0.1754.0",
|
|
|
|
|
|
+ //"Opera 28 Mozilla/5.0 (Linux; BRAVIA 4K 2015 Build/LMY48E.S265) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36 OPR/28.0.1754.0",
|
|
"Safari Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 HeyTapBrowser/40.7.29.1",
|
|
"Safari Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 HeyTapBrowser/40.7.29.1",
|
|
"Chrome 9 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.58 Safari/537.36 Edg/93.0.961.33",
|
|
"Chrome 9 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.58 Safari/537.36 Edg/93.0.961.33",
|
|
"Chrome 9 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/15.0 Chrome/90.0.4430.210 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/15.0 Chrome/90.0.4430.210 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
|
|
"Chrome Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
|
|
"Chrome Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
|
|
"Microsoft Edge Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
|
|
"Microsoft Edge Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
|
|
- "Chrome 8 Mozilla/5.0 (Windows NT 10.0; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
|
|
|
|
- "Chrome 8 Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
|
|
|
|
|
|
+ //"Chrome 8 Mozilla/5.0 (Windows NT 10.0; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
|
|
|
|
+ //"Chrome 8 Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
|
|
- "Chrome 8 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36",
|
|
|
|
|
|
+ //"Chrome 8 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
"Chrome Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
|
|
"Chrome Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
|
|
- "Firefox 7 Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0",
|
|
|
|
|
|
+ //"Firefox 7 Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36",
|
|
"Internet Explorer 11 Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; LCJB; rv:11.0) like Gecko",
|
|
"Internet Explorer 11 Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; LCJB; rv:11.0) like Gecko",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36",
|
|
@@ -63,7 +65,7 @@ var (
|
|
"Chrome Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3870.400 QQBrowser/10.8.4405.400",
|
|
"Chrome Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3870.400 QQBrowser/10.8.4405.400",
|
|
"Chrome 58 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0",
|
|
"Chrome 58 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0",
|
|
"Firefox 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0",
|
|
"Firefox 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0",
|
|
- "Chrome 8 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
|
|
|
|
|
|
+ //"Chrome 8 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Edg/94.0.992.38",
|
|
"Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Edg/94.0.992.38",
|
|
}
|
|
}
|
|
)
|
|
)
|
|
@@ -126,26 +128,24 @@ func NewBrowser(headless bool, showImage bool, proxyServe bool, baseUrl string)
|
|
// 创建一个浏览器实例
|
|
// 创建一个浏览器实例
|
|
incCtx, incCancelFn := chromedp.NewContext(allocCtx,
|
|
incCtx, incCancelFn := chromedp.NewContext(allocCtx,
|
|
chromedp.WithLogf(nil))
|
|
chromedp.WithLogf(nil))
|
|
- //TODO 设置浏览器网络加载超时
|
|
|
|
- // 监听网络请求并设置特定资源的超时
|
|
|
|
- chromedp.ListenTarget(ctx, func(ev interface{}) {
|
|
|
|
- switch ev := ev.(type) {
|
|
|
|
- case *network.EventRequestWillBeSent:
|
|
|
|
- // 设置超时时间
|
|
|
|
- timeout := time.Duration(Cfg.BrowserLoadResourceTimeout) * time.Second
|
|
|
|
- // TODO 这里要检查哪些资源进行超时中断监测,默认仅资源类请求
|
|
|
|
- // 配置太麻烦,先全局,所有请求类型,这个chromedp的超时上下文不一个概念
|
|
|
|
- // 设置一个定时器,当超时后取消请求
|
|
|
|
|
|
+ trie := NewTrie()
|
|
|
|
+ //TODO 这里默认构建通用的资源加载排除,最好是单个网站可以定制,
|
|
|
|
+ // 对于纯后端渲染网站,可以屏蔽所有资源加载,达到平台最高性能目的
|
|
|
|
+ trie.BatchInsert(Cfg.DisableLoadResource)
|
|
|
|
+ chromedp.ListenTarget(ctx, func(event interface{}) {
|
|
|
|
+ switch ev := event.(type) {
|
|
|
|
+ case *fetch.EventRequestPaused:
|
|
go func() {
|
|
go func() {
|
|
- select {
|
|
|
|
- case <-time.After(timeout):
|
|
|
|
- fid := fetch.RequestID(ev.RequestID)
|
|
|
|
- fetch.FailRequest(fid, network.ErrorReasonTimedOut).Do(ctx)
|
|
|
|
|
|
+ c := chromedp.FromContext(ctx)
|
|
|
|
+ ctx := cdp.WithExecutor(ctx, c.Target)
|
|
|
|
+ if trie.HasKeyword(ev.Request.URL) {
|
|
|
|
+ fetch.FailRequest(ev.RequestID, network.ErrorReasonBlockedByClient).Do(ctx)
|
|
|
|
+ } else {
|
|
|
|
+ fetch.ContinueRequest(ev.RequestID).Do(ctx)
|
|
}
|
|
}
|
|
}()
|
|
}()
|
|
}
|
|
}
|
|
})
|
|
})
|
|
-
|
|
|
|
//
|
|
//
|
|
chromedp.Run(ctx,
|
|
chromedp.Run(ctx,
|
|
fetch.Enable(),
|
|
fetch.Enable(),
|