Przeglądaj źródła

网络请求忽略缺陷修复

小阿七 7 miesięcy temu
rodzic
commit
9910ec68ce
4 zmienionych plików z 41 dodań i 40 usunięć
  1. 6 6
      backend/browser.go
  2. 1 0
      backend/config.yaml
  3. 23 7
      go.mod
  4. 11 27
      main.go

+ 6 - 6
backend/browser.go

@@ -132,22 +132,22 @@ func NewBrowser(headless bool, showImage bool, proxyServe bool, baseUrl string)
 	//TODO 这里默认构建通用的资源加载排除,最好是单个网站可以定制,
 	// 对于纯后端渲染网站,可以屏蔽所有资源加载,达到平台最高性能目的
 	trie.BatchInsert(Cfg.DisableLoadResource)
-	chromedp.ListenTarget(ctx, func(event interface{}) {
+	chromedp.ListenTarget(incCtx, func(event interface{}) {
 		switch ev := event.(type) {
 		case *fetch.EventRequestPaused:
 			go func() {
-				c := chromedp.FromContext(ctx)
-				ctx := cdp.WithExecutor(ctx, c.Target)
+				c := chromedp.FromContext(incCtx)
+				_ctx := cdp.WithExecutor(incCtx, c.Target)
 				if trie.HasKeyword(ev.Request.URL) {
-					fetch.FailRequest(ev.RequestID, network.ErrorReasonBlockedByClient).Do(ctx)
+					fetch.FailRequest(ev.RequestID, network.ErrorReasonBlockedByClient).Do(_ctx)
 				} else {
-					fetch.ContinueRequest(ev.RequestID).Do(ctx)
+					fetch.ContinueRequest(ev.RequestID).Do(_ctx)
 				}
 			}()
 		}
 	})
 	//
-	chromedp.Run(ctx,
+	chromedp.Run(incCtx,
 		fetch.Enable(),
 		chromedp.ActionFunc(func(cxt context.Context) error {
 			_, err := page.AddScriptToEvaluateOnNewDocument("Object.defineProperty(navigator, 'webdriver', { get: () => false, });").Do(cxt)

+ 1 - 0
backend/config.yaml

@@ -9,6 +9,7 @@
 ## 默认浏览器检查
 #default-browser-check: false
 browserLoadResourceTimeout: 5
+disableLoadResource : "ws://;wss://;.tof;.woff;.ico;.mp4;.zip;.rar;.exe;"
 #验证码解析
 timeout: 15
 address: "http://pycaptcha.spdata.jianyu360.com/v1/images/discern?pic_type="

+ 23 - 7
go.mod

@@ -1,8 +1,8 @@
 module spider_creator
 
-go 1.21.5
+go 1.22.0
 
-toolchain go1.22.2
+toolchain go1.22.4
 
 require (
 	github.com/bmaupin/go-epub v1.1.0
@@ -10,6 +10,7 @@ require (
 	github.com/chromedp/cdproto v0.0.0-20240810084448-b931b754e476
 	github.com/chromedp/chromedp v0.10.0
 	github.com/gabriel-vasile/mimetype v1.3.1
+	github.com/imroc/req/v3 v3.49.1
 	github.com/itcwc/go-zhipu v0.0.0-20240626065325-ffc8bf1cfaaa
 	github.com/wailsapp/wails/v2 v2.9.1
 	github.com/xuri/excelize/v2 v2.8.1
@@ -20,20 +21,27 @@ require (
 
 require (
 	github.com/PuerkitoBio/goquery v1.8.0 // indirect
+	github.com/andybalholm/brotli v1.1.1 // indirect
 	github.com/andybalholm/cascadia v1.3.1 // indirect
 	github.com/bep/debounce v1.2.1 // indirect
 	github.com/chromedp/sysutil v1.0.0 // indirect
+	github.com/cloudflare/circl v1.5.0 // indirect
 	github.com/dchest/captcha v1.0.0 // indirect
 	github.com/go-ole/go-ole v1.2.6 // indirect
+	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/gobwas/httphead v0.1.0 // indirect
 	github.com/gobwas/pool v0.2.1 // indirect
 	github.com/gobwas/ws v1.4.0 // indirect
 	github.com/godbus/dbus/v5 v5.1.0 // indirect
 	github.com/gofrs/uuid v3.1.0+incompatible // indirect
 	github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
+	github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect
 	github.com/google/uuid v1.3.0 // indirect
+	github.com/hashicorp/errwrap v1.1.0 // indirect
+	github.com/hashicorp/go-multierror v1.1.1 // indirect
 	github.com/jchv/go-winloader v0.0.0-20210711035445-715c2860da7e // indirect
 	github.com/josharian/intern v1.0.0 // indirect
+	github.com/klauspost/compress v1.17.11 // indirect
 	github.com/labstack/echo/v4 v4.10.2 // indirect
 	github.com/labstack/gommon v0.4.0 // indirect
 	github.com/leaanthony/go-ansi-parser v1.6.0 // indirect
@@ -44,8 +52,12 @@ require (
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.19 // indirect
 	github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
+	github.com/onsi/ginkgo/v2 v2.22.0 // indirect
 	github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
+	github.com/quic-go/qpack v0.5.1 // indirect
+	github.com/quic-go/quic-go v0.48.2 // indirect
+	github.com/refraction-networking/utls v1.6.7 // indirect
 	github.com/richardlehane/mscfb v1.0.4 // indirect
 	github.com/richardlehane/msoleps v1.0.3 // indirect
 	github.com/rivo/uniseg v0.4.4 // indirect
@@ -58,11 +70,15 @@ require (
 	github.com/wailsapp/mimetype v1.4.1 // indirect
 	github.com/xuri/efp v0.0.0-20231025114914-d1ff6096ae53 // indirect
 	github.com/xuri/nfp v0.0.0-20230919160717-d98342af3f05 // indirect
-	golang.org/x/crypto v0.23.0 // indirect
-	golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 // indirect
-	golang.org/x/net v0.25.0 // indirect
-	golang.org/x/sys v0.22.0 // indirect
-	golang.org/x/text v0.15.0 // indirect
+	go.uber.org/mock v0.5.0 // indirect
+	golang.org/x/crypto v0.31.0 // indirect
+	golang.org/x/exp v0.0.0-20241215155358-4a5509556b9e // indirect
+	golang.org/x/mod v0.22.0 // indirect
+	golang.org/x/net v0.33.0 // indirect
+	golang.org/x/sync v0.10.0 // indirect
+	golang.org/x/sys v0.28.0 // indirect
+	golang.org/x/text v0.21.0 // indirect
+	golang.org/x/tools v0.28.0 // indirect
 	gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect
 )
 

+ 11 - 27
main.go

@@ -3,27 +3,17 @@ package main
 import (
 	"container/list"
 	"embed"
-<<<<<<< HEAD
-=======
-	"github.com/wailsapp/wails/v2"
-	"github.com/wailsapp/wails/v2/pkg/options"
-	"github.com/wailsapp/wails/v2/pkg/options/assetserver"
-	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
->>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
+
 	be "spider_creator/backend"
 	bdb "spider_creator/backend/db"
 	"spider_creator/backend/script"
 	bvm "spider_creator/backend/vm"
 	bws "spider_creator/backend/webservice"
-<<<<<<< HEAD
-
-	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 
 	"github.com/wailsapp/wails/v2"
 	"github.com/wailsapp/wails/v2/pkg/options"
 	"github.com/wailsapp/wails/v2/pkg/options/assetserver"
-=======
->>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 )
 
 var (
@@ -38,15 +28,12 @@ var (
 	vm                   *bvm.VM
 	glvm                 *script.GLVm
 	ws                   *bws.WebService
-<<<<<<< HEAD
+
 	//重点网站和正式环境
 	isOnly4MainSite             string = "false"
-	browserDisableLoadResources        = "ws://;wss://;.tof;.woff;.ico;.mp4;.zip;.rar;.exe;"
+	browserDisableLoadResources        = ""
 	serverAddress                      = "http://visualizeld.spdata.jianyu360.com/%s" //正式环境
-=======
-	isOnly4MainSite      = "false"
-	serverAddress        = "http://visualizeld.spdata.jianyu360.com/%s" //正式环境
->>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
+
 	//serverAddress = "http://127.0.0.1:8091/%s" //正式环境
 )
 
@@ -54,20 +41,17 @@ var (
 // wails build -ldflags="-X 'main.isOnly4MainSite=false'" -o="剑鱼可视化爬虫开发工具_正式.exe"
 
 func init() {
-<<<<<<< HEAD
-	//be.LoadConfig("./config.yaml")
-	be.Cfg.DisableLoadResource = browserDisableLoadResources
-=======
-	be.LoadConfig("backend/config.yaml")
->>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
+	be.LoadConfig("./config.yaml")
+	//从配置文件外,编译时设定浏览器排除加载资源。比配置文件优先级低
+	if be.Cfg.DisableLoadResource == "" && browserDisableLoadResources != "" {
+		be.Cfg.DisableLoadResource = browserDisableLoadResources
+	}
+
 	be.Cfg.IsOnly4MainSite = isOnly4MainSite == "true"
 	if be.Cfg.IsOnly4MainSite {
 		serverAddress = "http://visualize.spdata.jianyu360.com/%s" //重点网站
 	}
-<<<<<<< HEAD
 
-=======
->>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
 	qu.Debug("重点网站:", be.Cfg.IsOnly4MainSite, serverAddress)
 }