|
@@ -29,7 +29,9 @@ const (
|
|
)
|
|
)
|
|
|
|
|
|
var (
|
|
var (
|
|
- Reg_Date = regexp.MustCompile(`\d`)
|
|
|
|
|
|
+ Reg_Date = regexp.MustCompile(`\d`)
|
|
|
|
+ Reg_File_ContentType = regexp.MustCompile(`(?i)^(application/(vnd\.(openxmlformats-officedocument|ms-excel)|msword|pdf)|image/(png|jpeg))`)
|
|
|
|
+ Reg_File_Type = regexp.MustCompile(`(?i)\.(pdf|doc|docx|xls|xlsx|ppt|pptx|jpg|jpeg|png|gif|bmp|zip|rar|7z|gz|csv|swf)$`)
|
|
)
|
|
)
|
|
|
|
|
|
type (
|
|
type (
|
|
@@ -78,7 +80,23 @@ func downloadAttaches(v *be.ResultItem, attachesDir string) {
|
|
}
|
|
}
|
|
for _, attach := range v.AttachLinks {
|
|
for _, attach := range v.AttachLinks {
|
|
qu.Debug("准备下载附件,", attach.Href, attach.Title)
|
|
qu.Debug("准备下载附件,", attach.Href, attach.Title)
|
|
- req, err := http.NewRequest("GET", attach.Href, nil)
|
|
|
|
|
|
+ //if !Reg_File_Type.MatchString(attach.Title) {
|
|
|
|
+ req, err := http.NewRequest("HEAD", attach.Href, nil)
|
|
|
|
+ if err != nil {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ resp, err := client.Do(req)
|
|
|
|
+ if err != nil || resp.StatusCode != http.StatusOK {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ ft := resp.Header.Get("Content-Type")
|
|
|
|
+ fl := resp.Header.Get("Content-Length")
|
|
|
|
+ qu.Debug("------------", ft, qu.IntAll(fl), qu.IntAll(fl)/1024)
|
|
|
|
+ if !Reg_File_ContentType.MatchString(ft) || qu.IntAll(fl) < 1024*5 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ //}
|
|
|
|
+ req, err = http.NewRequest("GET", attach.Href, nil)
|
|
if err != nil {
|
|
if err != nil {
|
|
qu.Debug(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
|
|
qu.Debug(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
|
|
continue
|
|
continue
|
|
@@ -91,7 +109,7 @@ func downloadAttaches(v *be.ResultItem, attachesDir string) {
|
|
req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
|
|
req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
|
|
req.Header.Add("host", hostName)
|
|
req.Header.Add("host", hostName)
|
|
req.Header.Add("referer", v.Href)
|
|
req.Header.Add("referer", v.Href)
|
|
- resp, err := client.Do(req)
|
|
|
|
|
|
+ resp, err = client.Do(req)
|
|
if err != nil {
|
|
if err != nil {
|
|
qu.Debug(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())
|
|
qu.Debug(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())
|
|
continue
|
|
continue
|