2
0

2 Коммитууд e287385655 ... 526e3c2f45

Эзэн SHA1 Мессеж Огноо
  小阿七 526e3c2f45 Merge branch 'dev1.1' of https://jygit.jydev.jianyu360.cn/data_processing/spider_creater into dev1.1 6 сар өмнө
  小阿七 fc0ba11ab7 下载逻辑优化 6 сар өмнө
3 өөрчлөгдсөн 12 нэмэгдсэн , 1 устгасан
  1. 1 0
      .gitignore
  2. 10 1
      backend/vm/vm.go
  3. 1 0
      go.mod

+ 1 - 0
.gitignore

@@ -10,4 +10,5 @@ bin
 .db
 .db
 go.sum
 go.sum
 spider.dat
 spider.dat
+go.mod
 spider_attaches/
 spider_attaches/

+ 10 - 1
backend/vm/vm.go

@@ -7,9 +7,9 @@ import (
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
 	"io/ioutil"
 	"io/ioutil"
-	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"math/rand"
 	"math/rand"
 	"net/http"
 	"net/http"
+	"net/url"
 	"os"
 	"os"
 	"regexp"
 	"regexp"
 	be "spider_creator/backend"
 	be "spider_creator/backend"
@@ -17,6 +17,8 @@ import (
 	"text/template"
 	"text/template"
 	"time"
 	"time"
 
 
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+
 	"github.com/chromedp/chromedp"
 	"github.com/chromedp/chromedp"
 
 
 	"github.com/gabriel-vasile/mimetype"
 	"github.com/gabriel-vasile/mimetype"
@@ -82,7 +84,14 @@ func downloadAttaches(v *be.ResultItem, attachesDir string) {
 			qu.Debug(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
 			qu.Debug(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
 			continue
 			continue
 		}
 		}
+		//构造请求头
+		var hostName string
+		if parsedURL, err := url.Parse(attach.Href); err == nil {
+			hostName = parsedURL.Host
+		}
 		req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
 		req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
+		req.Header.Add("host", hostName)
+		req.Header.Add("referer", v.Href)
 		resp, err := client.Do(req)
 		resp, err := client.Do(req)
 		if err != nil {
 		if err != nil {
 			qu.Debug(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())
 			qu.Debug(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())

+ 1 - 0
go.mod

@@ -83,3 +83,4 @@ require (
 )
 )
 
 
 // replace github.com/wailsapp/wails/v2 v2.9.1 => /Users/taozhang/go/pkg/mod
 // replace github.com/wailsapp/wails/v2 v2.9.1 => /Users/taozhang/go/pkg/mod
+replace jygit.jydev.jianyu360.cn/data_processing/common_utils v0.0.0-20240202055658-e2ef72e18b40 => ../common_utils