Browse Source

中国招标投标公共服务平台异常附件问题处理

maxiaoshan 2 years ago
parent
commit
3f1c48ec62
4 changed files with 27 additions and 7 deletions
  1. 1 1
      src/spider/handler.go
  2. 23 5
      src/spider/script.go
  3. 1 1
      src/spider/spider.go
  4. 2 0
      src/spider/util.go

+ 1 - 1
src/spider/handler.go

@@ -143,7 +143,7 @@ func NoQueueScript() {
 func NewSpider(code, luafile string) *Spider {
 	defer mu.Catch()
 	spider := &Spider{}
-	err := spider.LoadScript(code, luafile, true)
+	err := spider.LoadScript(&spider.Name, code, luafile, true)
 	if err != "" {
 		return nil
 	}

+ 23 - 5
src/spider/script.go

@@ -71,7 +71,7 @@ func init() {
 var TimeSleepChan = make(chan bool, 1)
 
 //加载文件
-func (s *Script) LoadScript(code, script_file string, newstate bool) string {
+func (s *Script) LoadScript(site *string, code, script_file string, newstate bool) string {
 	defer mu.Catch()
 	s.SCode = code
 	s.ScriptFile = script_file
@@ -549,6 +549,15 @@ func (s *Script) LoadScript(code, script_file string, newstate bool) string {
 				ftype = path.Ext(name)[1:]
 			}
 		}
+		//特殊处理中国招标投标公共服务平台异常附件过滤
+		if *site == "中国招标投标公共服务平台" {
+			if fid != "" && strings.Contains(fid, ErrFid) { //限制访问的附件
+				size, ftype, fid = "", "", "" //信息置空,AnalysisProjectInfo方法将判断数据下载失败重新下载
+			} else if bttype := qu.GetFileType(ret); bttype != "pdf" { //由字节流解析的附件类型不是pdf
+				logger.Info("Error File Type:", bttype, url)
+				size, ftype, fid = "", "", ""
+			}
+		}
 		S.Push(lua.LString(url))
 		S.Push(lua.LString(name))
 		S.Push(lua.LString(size))
@@ -841,6 +850,15 @@ func (s *Script) LoadScript(code, script_file string, newstate bool) string {
 				ftype = path.Ext(name)[1:]
 			}
 		}
+		//特殊处理中国招标投标公共服务平台异常附件过滤
+		if *site == "中国招标投标公共服务平台" {
+			if fid != "" && strings.Contains(fid, ErrFid) { //限制访问的附件
+				size, ftype, fid = "", "", "" //信息置空,AnalysisProjectInfo方法将判断数据下载失败重新下载
+			} else if bttype := qu.GetFileType(ret); bttype != "pdf" { //由字节流解析的附件类型不是pdf
+				logger.Info("Error File Type:", bttype, url)
+				size, ftype, fid = "", "", ""
+			}
+		}
 		S.Push(lua.LString(url))
 		S.Push(lua.LString(name))
 		S.Push(lua.LString(size))
@@ -901,10 +919,10 @@ func getChildrenLen(sq *gq.Selection) (ret int) {
 }
 
 //
-func (s *Script) Reload() {
-	s.L.Close()
-	s.LoadScript(s.SCode, s.ScriptFile, false)
-}
+//func (s *Script) Reload() {
+//	s.L.Close()
+//	s.LoadScript(s.SCode, s.ScriptFile, false)
+//}
 
 //unicode转码
 func transUnic(str string) string {

+ 1 - 1
src/spider/spider.go

@@ -362,7 +362,7 @@ func ReloadScript(code string) {
 			}
 			sp.MUserName = codeInfo["modifyuser"]
 			sp.MUserEmail = codeInfo["modifyemail"]
-			sp.LoadScript(code, sp.ScriptFile, true)
+			sp.LoadScript(&sp.Name, code, sp.ScriptFile, true)
 		}
 		AllspidersMapLock.Unlock()
 	}

+ 2 - 0
src/spider/util.go

@@ -4,6 +4,8 @@ import (
 	qu "qfw/util"
 )
 
+var ErrFid = "a6879f0a8570256aa21fb978e6dabb50429a30dfacff697cf0b898abbc5c262e" //限制访问的附件
+
 //初始化延迟采集站点集合
 func InitOther() {
 	defer qu.Catch()