|
@@ -10,6 +10,7 @@ import (
|
|
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
"math/rand"
|
|
"math/rand"
|
|
"net/http"
|
|
"net/http"
|
|
|
|
+ "net/url"
|
|
"os"
|
|
"os"
|
|
"regexp"
|
|
"regexp"
|
|
be "spider_creator/backend"
|
|
be "spider_creator/backend"
|
|
@@ -82,7 +83,14 @@ func downloadAttaches(v *be.ResultItem, attachesDir string) {
|
|
qu.Debug(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
|
|
qu.Debug(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
|
|
+ //构造请求头
|
|
|
|
+ var hostName string
|
|
|
|
+ if parsedURL, err := url.Parse(attach.Href); err == nil {
|
|
|
|
+ hostName = parsedURL.Host
|
|
|
|
+ }
|
|
req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
|
|
req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
|
|
|
|
+ req.Header.Add("host", hostName)
|
|
|
|
+ req.Header.Add("href", v.Href)
|
|
resp, err := client.Do(req)
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
if err != nil {
|
|
qu.Debug(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())
|
|
qu.Debug(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())
|