瀏覽代碼

downloadFile方法更新

maxiaoshan 2 年之前
父節點
當前提交
ffe3e2332a
共有 2 個文件被更改,包括 34 次插入31 次删除
  1. 1 1
      src/spider/download.go
  2. 33 30
      src/spider/script.go

+ 1 - 1
src/spider/download.go

@@ -198,7 +198,7 @@ func DownloadFile_bak(downloaderid, url, method string, reqparam, head map[strin
 	}
 }
 
-func DownloadFile(retLen *int64, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
+func DownloadFile(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
 	defer mu.Catch()
 	timeout = timeout * 2
 	msgid := mu.UUID(8)

+ 33 - 30
src/spider/script.go

@@ -214,8 +214,16 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		base64UrlReg := regexp.MustCompile("data:image")
 		indexArr := base64UrlReg.FindStringIndex(url)
 		name, size, ftype, fid := "", "", "", ""
+		tmpUrl := ""
 		var ret []byte
 		var err error
+		var mycookie []*http.Cookie
+		if cookie != "{}" {
+			json.Unmarshal([]byte(cookie), &mycookie)
+		} else {
+			mycookie = make([]*http.Cookie, 0)
+		}
+
 		//base64 url
 		if len(indexArr) == 2 { //base64 http://www.mmjyjt.com/data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqAAAAOwCAYAAAD
 			//截取base64
@@ -229,38 +237,10 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 				url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, "", ret)
 			}
 		} else {
-			var mycookie []*http.Cookie
-			if cookie != "{}" {
-				json.Unmarshal([]byte(cookie), &mycookie)
-			} else {
-				mycookie = make([]*http.Cookie, 0)
-			}
 			fileName = strings.TrimSpace(fileName)
 			url = strings.TrimSpace(url)
-			var retLen int64
-			ret = DownloadFile(&retLen, s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
-			//流量统计
-			//if retLen > 0 {
-			//	key := Today + "+" + code
-			//	if sf, ok := SpiderFlowMap.Load(key); ok && sf != nil {
-			//		if sfMap, ok := sf.(*SpiderFlow); ok {
-			//			sfMap.Flow += retLen
-			//			//sfMap.Site = *site
-			//			//sfMap.Channel = *channel
-			//			//sfMap.ModifyUser = *user
-			//			SpiderFlowMap.Store(key, sfMap)
-			//		}
-			//	} else {
-			//		SpiderFlowMap.Store(key, &SpiderFlow{
-			//			//Code:       code,
-			//			Site:       *site,
-			//			Channel:    *channel,
-			//			Flow:       retLen,
-			//			ModifyUser: *user,
-			//		})
-			//	}
-			//}
-
+			tmpUrl = url
+			ret = DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
 			url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
 			if strings.TrimSpace(ftype) == "" {
 				if len(path.Ext(name)) > 0 {
@@ -276,6 +256,29 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 				logger.Info("Error File Type:", bttype, url)
 				size, ftype, fid = "", "", ""
 			}
+		} else if *site == "中国政府采购网" && tmpUrl != "" { //中国政府采购网附件大小异常,限制IP所致
+			if size == "4.1 KB" || size == "4.2 KB" {
+				times := 1
+				for { //重试三次
+					if times > 3 {
+						break
+					}
+					//http://www.ccgp.gov.cn/cggg/dfgg/jzxcs/202302/t20230210_19437644.htm
+					ret = DownloadFile(s.Downloader, tmpUrl, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
+					bs := bytes.NewReader(ret)
+					bsLen := qu.ConvertFileSize(bs.Len())
+					if bsLen != "4.1 KB" && bsLen != "4.2 KB" && bsLen != "0 B" {
+						url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, tmpUrl, ret)
+						break
+					}
+					times++
+				}
+				if size == "4.1 KB" || size == "4.2 KB" { //重试后异常
+					fid = ""
+					ftype = ""
+					name = ""
+				}
+			}
 		}
 		S.Push(lua.LString(url))
 		S.Push(lua.LString(name))