|
@@ -7,9 +7,9 @@ import (
|
|
|
"errors"
|
|
|
"fmt"
|
|
|
"io/ioutil"
|
|
|
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
|
"math/rand"
|
|
|
"net/http"
|
|
|
+ "net/url"
|
|
|
"os"
|
|
|
"regexp"
|
|
|
be "spider_creator/backend"
|
|
@@ -17,6 +17,8 @@ import (
|
|
|
"text/template"
|
|
|
"time"
|
|
|
|
|
|
+ qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
|
+
|
|
|
"github.com/chromedp/chromedp"
|
|
|
|
|
|
"github.com/gabriel-vasile/mimetype"
|
|
@@ -82,7 +84,14 @@ func downloadAttaches(v *be.ResultItem, attachesDir string) {
|
|
|
qu.Debug(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
|
|
|
continue
|
|
|
}
|
|
|
+ //构造请求头
|
|
|
+ var hostName string
|
|
|
+ if parsedURL, err := url.Parse(attach.Href); err == nil {
|
|
|
+ hostName = parsedURL.Host
|
|
|
+ }
|
|
|
req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
|
|
|
+ req.Header.Add("host", hostName)
|
|
|
+ req.Header.Add("referer", v.Href)
|
|
|
resp, err := client.Do(req)
|
|
|
if err != nil {
|
|
|
qu.Debug(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())
|