|
@@ -227,6 +227,82 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
|
|
return 3
|
|
return 3
|
|
}))
|
|
}))
|
|
//下载附件downloadFile(url,method,param,head,cookie,fileName)
|
|
//下载附件downloadFile(url,method,param,head,cookie,fileName)
|
|
|
|
+ //s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
|
|
|
|
+ // if s.FileLastThreeTimes == nil {
|
|
|
|
+ // s.FileLastThreeTimes = make([]time.Duration, 4)
|
|
|
|
+ // }
|
|
|
|
+ // if util.Config.IsDelay {
|
|
|
|
+ // SleepTime(3, s.FileLastThreeTimes) //睡眠时间
|
|
|
|
+ // }
|
|
|
|
+ // start := time.Now() //起始时间
|
|
|
|
+ // cookie := S.ToString(-1)
|
|
|
|
+ // head := S.ToTable(-2)
|
|
|
|
+ // param := S.ToTable(-3)
|
|
|
|
+ // method := S.ToString(-4)
|
|
|
|
+ // url := S.ToString(-5)
|
|
|
|
+ // fileName := S.ToString(-6)
|
|
|
|
+ // ishttps := strings.Contains(url, "https")
|
|
|
|
+ // //base64匹配
|
|
|
|
+ // base64UrlReg := regexp.MustCompile("data:image")
|
|
|
|
+ // indexArr := base64UrlReg.FindStringIndex(url)
|
|
|
|
+ // name, size, ftype, fid := "", "", "", ""
|
|
|
|
+ // var ret []byte
|
|
|
|
+ // var err error
|
|
|
|
+ // //base64 url
|
|
|
|
+ // if len(indexArr) == 2 { //base64 http://www.mmjyjt.com/
|
|
|
|
+ // //截取base64
|
|
|
|
+ // start := indexArr[0]
|
|
|
|
+ // url = url[start:]
|
|
|
|
+ // fileName = "文件下载.jpg"
|
|
|
|
+ // index := strings.Index(url, ",")
|
|
|
|
+ // dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(url[index+1:]))
|
|
|
|
+ // ret, err = io.ReadAll(dec)
|
|
|
|
+ // if err == nil && len(ret) > 0 {
|
|
|
|
+ // url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, "", ret)
|
|
|
|
+ // }
|
|
|
|
+ // } else {
|
|
|
|
+ // var mycookie []*http.Cookie
|
|
|
|
+ // if cookie != "{}" {
|
|
|
|
+ // json.Unmarshal([]byte(cookie), &mycookie)
|
|
|
|
+ // } else {
|
|
|
|
+ // mycookie = make([]*http.Cookie, 0)
|
|
|
|
+ // }
|
|
|
|
+ // fileName = strings.TrimSpace(fileName)
|
|
|
|
+ // url = strings.TrimSpace(url)
|
|
|
|
+ // ret = DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
|
|
|
|
+ // url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
|
|
|
|
+ // if strings.TrimSpace(ftype) == "" {
|
|
|
|
+ // if len(path.Ext(name)) > 0 {
|
|
|
|
+ // ftype = path.Ext(name)[1:]
|
|
|
|
+ // }
|
|
|
|
+ // }
|
|
|
|
+ // }
|
|
|
|
+ // //特殊处理中国招标投标公共服务平台异常附件过滤
|
|
|
|
+ // if *site == "中国招标投标公共服务平台" {
|
|
|
|
+ // if fid != "" && strings.Contains(fid, ErrFid) { //限制访问的附件
|
|
|
|
+ // size, ftype, fid = "", "", "" //信息置空,AnalysisProjectInfo方法将判断数据下载失败重新下载
|
|
|
|
+ // } else if bttype := qu.GetFileType(ret); bttype != "pdf" { //由字节流解析的附件类型不是pdf
|
|
|
|
+ // logger.Info("Error File Type:", bttype, url)
|
|
|
|
+ // size, ftype, fid = "", "", ""
|
|
|
|
+ // }
|
|
|
|
+ // }
|
|
|
|
+ // S.Push(lua.LString(url))
|
|
|
|
+ // S.Push(lua.LString(name))
|
|
|
|
+ // S.Push(lua.LString(size))
|
|
|
|
+ // S.Push(lua.LString(ftype))
|
|
|
|
+ // S.Push(lua.LString(fid))
|
|
|
|
+ // atomic.AddInt32(&s.ToDayRequestNum, 1)
|
|
|
|
+ // atomic.AddInt32(&s.TotalRequestNum, 1)
|
|
|
|
+ //
|
|
|
|
+ // end := time.Since(start)
|
|
|
|
+ // if len(s.FileLastThreeTimes) >= 4 {
|
|
|
|
+ // s.FileLastThreeTimes = s.FileLastThreeTimes[1:]
|
|
|
|
+ // }
|
|
|
|
+ // s.FileLastThreeTimes = append(s.FileLastThreeTimes, end)
|
|
|
|
+ // return 5
|
|
|
|
+ //}))
|
|
|
|
+
|
|
|
|
+ //附件大小限制3KB时,解决中国政府采购网附件采集问题
|
|
s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
|
|
s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
|
|
if s.FileLastThreeTimes == nil {
|
|
if s.FileLastThreeTimes == nil {
|
|
s.FileLastThreeTimes = make([]time.Duration, 4)
|
|
s.FileLastThreeTimes = make([]time.Duration, 4)
|
|
@@ -246,8 +322,16 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
|
|
base64UrlReg := regexp.MustCompile("data:image")
|
|
base64UrlReg := regexp.MustCompile("data:image")
|
|
indexArr := base64UrlReg.FindStringIndex(url)
|
|
indexArr := base64UrlReg.FindStringIndex(url)
|
|
name, size, ftype, fid := "", "", "", ""
|
|
name, size, ftype, fid := "", "", "", ""
|
|
|
|
+ tmpUrl := ""
|
|
var ret []byte
|
|
var ret []byte
|
|
var err error
|
|
var err error
|
|
|
|
+ var mycookie []*http.Cookie
|
|
|
|
+ if cookie != "{}" {
|
|
|
|
+ json.Unmarshal([]byte(cookie), &mycookie)
|
|
|
|
+ } else {
|
|
|
|
+ mycookie = make([]*http.Cookie, 0)
|
|
|
|
+ }
|
|
|
|
+
|
|
//base64 url
|
|
//base64 url
|
|
if len(indexArr) == 2 { //base64 http://www.mmjyjt.com/
|
|
if len(indexArr) == 2 { //base64 http://www.mmjyjt.com/
|
|
//截取base64
|
|
//截取base64
|
|
@@ -261,14 +345,9 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
|
|
url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, "", ret)
|
|
url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, "", ret)
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
- var mycookie []*http.Cookie
|
|
|
|
- if cookie != "{}" {
|
|
|
|
- json.Unmarshal([]byte(cookie), &mycookie)
|
|
|
|
- } else {
|
|
|
|
- mycookie = make([]*http.Cookie, 0)
|
|
|
|
- }
|
|
|
|
fileName = strings.TrimSpace(fileName)
|
|
fileName = strings.TrimSpace(fileName)
|
|
url = strings.TrimSpace(url)
|
|
url = strings.TrimSpace(url)
|
|
|
|
+ tmpUrl = url
|
|
ret = DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
|
|
ret = DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
|
|
url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
|
|
url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
|
|
if strings.TrimSpace(ftype) == "" {
|
|
if strings.TrimSpace(ftype) == "" {
|
|
@@ -285,6 +364,29 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
|
|
logger.Info("Error File Type:", bttype, url)
|
|
logger.Info("Error File Type:", bttype, url)
|
|
size, ftype, fid = "", "", ""
|
|
size, ftype, fid = "", "", ""
|
|
}
|
|
}
|
|
|
|
+ } else if *site == "中国政府采购网" && tmpUrl != "" { //中国政府采购网附件大小异常,限制IP所致
|
|
|
|
+ if size == "4.1 KB" || size == "4.2 KB" {
|
|
|
|
+ times := 1
|
|
|
|
+ for { //重试三次
|
|
|
|
+ if times > 3 {
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ //http://www.ccgp.gov.cn/cggg/dfgg/jzxcs/202302/t20230210_19437644.htm
|
|
|
|
+ ret = DownloadFile(s.Downloader, tmpUrl, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
|
|
|
|
+ bs := bytes.NewReader(ret)
|
|
|
|
+ bsLen := qu.ConvertFileSize(bs.Len())
|
|
|
|
+ if bsLen != "4.1 KB" && bsLen != "4.2 KB" && bsLen != "0 B" {
|
|
|
|
+ url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, tmpUrl, ret)
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ times++
|
|
|
|
+ }
|
|
|
|
+ if size == "4.1 KB" || size == "4.2 KB" { //重试后异常
|
|
|
|
+ fid = ""
|
|
|
|
+ ftype = ""
|
|
|
|
+ name = ""
|
|
|
|
+ }
|
|
|
|
+ }
|
|
}
|
|
}
|
|
S.Push(lua.LString(url))
|
|
S.Push(lua.LString(url))
|
|
S.Push(lua.LString(name))
|
|
S.Push(lua.LString(name))
|
|
@@ -301,109 +403,7 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
|
|
s.FileLastThreeTimes = append(s.FileLastThreeTimes, end)
|
|
s.FileLastThreeTimes = append(s.FileLastThreeTimes, end)
|
|
return 5
|
|
return 5
|
|
}))
|
|
}))
|
|
- /*
|
|
|
|
- //附件大小限制3KB时,解决中国政府采购网附件采集问题
|
|
|
|
- s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
|
|
|
|
- if s.FileLastThreeTimes == nil {
|
|
|
|
- s.FileLastThreeTimes = make([]time.Duration, 4)
|
|
|
|
- }
|
|
|
|
- if util.Config.IsDelay {
|
|
|
|
- SleepTime(3, s.FileLastThreeTimes) //睡眠时间
|
|
|
|
- }
|
|
|
|
- start := time.Now() //起始时间
|
|
|
|
- cookie := S.ToString(-1)
|
|
|
|
- head := S.ToTable(-2)
|
|
|
|
- param := S.ToTable(-3)
|
|
|
|
- method := S.ToString(-4)
|
|
|
|
- url := S.ToString(-5)
|
|
|
|
- fileName := S.ToString(-6)
|
|
|
|
- ishttps := strings.Contains(url, "https")
|
|
|
|
- //base64匹配
|
|
|
|
- base64UrlReg := regexp.MustCompile("data:image")
|
|
|
|
- indexArr := base64UrlReg.FindStringIndex(url)
|
|
|
|
- name, size, ftype, fid := "", "", "", ""
|
|
|
|
- tmpUrl := ""
|
|
|
|
- var ret []byte
|
|
|
|
- var err error
|
|
|
|
- var mycookie []*http.Cookie
|
|
|
|
- if cookie != "{}" {
|
|
|
|
- json.Unmarshal([]byte(cookie), &mycookie)
|
|
|
|
- } else {
|
|
|
|
- mycookie = make([]*http.Cookie, 0)
|
|
|
|
- }
|
|
|
|
|
|
|
|
- //base64 url
|
|
|
|
- if len(indexArr) == 2 { //base64 http://www.mmjyjt.com/
|
|
|
|
- //截取base64
|
|
|
|
- start := indexArr[0]
|
|
|
|
- url = url[start:]
|
|
|
|
- fileName = "文件下载.jpg"
|
|
|
|
- index := strings.Index(url, ",")
|
|
|
|
- dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(url[index+1:]))
|
|
|
|
- ret, err = io.ReadAll(dec)
|
|
|
|
- if err == nil && len(ret) > 0 {
|
|
|
|
- url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, "", ret)
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- fileName = strings.TrimSpace(fileName)
|
|
|
|
- url = strings.TrimSpace(url)
|
|
|
|
- tmpUrl = url
|
|
|
|
- ret = DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
|
|
|
|
- url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, url, ret)
|
|
|
|
- if strings.TrimSpace(ftype) == "" {
|
|
|
|
- if len(path.Ext(name)) > 0 {
|
|
|
|
- ftype = path.Ext(name)[1:]
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- //特殊处理中国招标投标公共服务平台异常附件过滤
|
|
|
|
- if *site == "中国招标投标公共服务平台" {
|
|
|
|
- if fid != "" && strings.Contains(fid, ErrFid) { //限制访问的附件
|
|
|
|
- size, ftype, fid = "", "", "" //信息置空,AnalysisProjectInfo方法将判断数据下载失败重新下载
|
|
|
|
- } else if bttype := qu.GetFileType(ret); bttype != "pdf" { //由字节流解析的附件类型不是pdf
|
|
|
|
- logger.Info("Error File Type:", bttype, url)
|
|
|
|
- size, ftype, fid = "", "", ""
|
|
|
|
- }
|
|
|
|
- } else if *site == "中国政府采购网" && tmpUrl != "" { //中国政府采购网附件大小异常,限制IP所致
|
|
|
|
- if size == "4.1 KB" || size == "4.2 KB" {
|
|
|
|
- times := 1
|
|
|
|
- for { //重试三次
|
|
|
|
- if times > 3 {
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- //http://www.ccgp.gov.cn/cggg/dfgg/jzxcs/202302/t20230210_19437644.htm
|
|
|
|
- ret = DownloadFile(s.Downloader, tmpUrl, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
|
|
|
|
- bs := bytes.NewReader(ret)
|
|
|
|
- bsLen := qu.ConvertFileSize(bs.Len())
|
|
|
|
- if bsLen != "4.1 KB" && bsLen != "4.2 KB" && bsLen != "0 B" {
|
|
|
|
- url, name, size, ftype, fid = util.UploadFile(s.SCode, fileName, tmpUrl, ret)
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- times++
|
|
|
|
- }
|
|
|
|
- if size == "4.1 KB" || size == "4.2 KB" { //重试后异常
|
|
|
|
- fid = ""
|
|
|
|
- ftype = ""
|
|
|
|
- name = ""
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- S.Push(lua.LString(url))
|
|
|
|
- S.Push(lua.LString(name))
|
|
|
|
- S.Push(lua.LString(size))
|
|
|
|
- S.Push(lua.LString(ftype))
|
|
|
|
- S.Push(lua.LString(fid))
|
|
|
|
- atomic.AddInt32(&s.ToDayRequestNum, 1)
|
|
|
|
- atomic.AddInt32(&s.TotalRequestNum, 1)
|
|
|
|
-
|
|
|
|
- end := time.Since(start)
|
|
|
|
- if len(s.FileLastThreeTimes) >= 4 {
|
|
|
|
- s.FileLastThreeTimes = s.FileLastThreeTimes[1:]
|
|
|
|
- }
|
|
|
|
- s.FileLastThreeTimes = append(s.FileLastThreeTimes, end)
|
|
|
|
- return 5
|
|
|
|
- }))
|
|
|
|
- */
|
|
|
|
//下载、上传base64图片
|
|
//下载、上传base64图片
|
|
s.L.SetGlobal("downloadBase64File", s.L.NewFunction(func(S *lua.LState) int {
|
|
s.L.SetGlobal("downloadBase64File", s.L.NewFunction(func(S *lua.LState) int {
|
|
url := S.ToString(-3)
|
|
url := S.ToString(-3)
|