maxiaoshan 3 anos atrás
pai
commit
3eb02c85e5
2 arquivos alterados com 4 adições e 58 exclusões
  1. 1 54
      src/spider/script.go
  2. 3 4
      src/spider/util.go

+ 1 - 54
src/spider/script.go

@@ -120,33 +120,7 @@ func (s *Script) LoadScript(code, script_file string, newstate bool) string {
 			charset = s.Encoding
 		}
 		ret := Download(s.Downloader, url, "get", util.GetTable(head), charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
-		//后续代码中完善sendListNum方法后将下方有关心跳代码去除
-		//result := "Ret Null"
-		//ok := false
-		//if ret != "" && ret != "[]" {
-		//	ret = SpaceReg.ReplaceAllString(ret, "")
-		//	tmpArr := []interface{}{}
-		//	tmpMap := map[string]interface{}{}
-		//	if err := json.Unmarshal([]byte(ret), &tmpMap); err == nil && len(tmpMap) >= 2 {
-		//		//result = "Map Ok"
-		//		ok = true
-		//	} else if err := json.Unmarshal([]byte(ret), &tmpArr); err == nil && len(tmpArr) >= 2 {
-		//		//result = "Arr Ok"
-		//		ok = true
-		//	} else if htmlArr := HtmlReg.FindAllString(ret, -1); len(htmlArr) >= 4 {
-		//		//result = "Html Ok"
-		//		ok = true
-		//	} else if textArr := HanReg.FindAllString(ret, -1); len(textArr) > 0 {
-		//		text := strings.Join(textArr, "")
-		//		if len([]rune(text)) >= 50 {
-		//			//result = "Text Ok"
-		//			ok = true
-		//		}
-		//	}
-		//}
-		//if ok {
-		//	UpdateHeart("", "", code, "", "findlist") //记录列表页实际采集数据量心跳
-		//}
+
 		S.Push(lua.LString(ret))
 		atomic.AddInt32(&s.ToDayRequestNum, 1)
 		atomic.AddInt32(&s.TotalRequestNum, 1)
@@ -186,33 +160,6 @@ func (s *Script) LoadScript(code, script_file string, newstate bool) string {
 		} else {
 			ret, retcookie = DownloadAdv(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
 		}
-		//后续代码中完善sendListNum方法后将下方有关心跳代码去除
-		//result := "Ret Null"
-		//ok := false
-		//if ret != "" && ret != "[]" {
-		//	ret = SpaceReg.ReplaceAllString(ret, "")
-		//	tmpArr := []interface{}{}
-		//	tmpMap := map[string]interface{}{}
-		//	if err := json.Unmarshal([]byte(ret), &tmpMap); err == nil && len(tmpMap) >= 2 {
-		//		//result = "Map Ok"
-		//		ok = true
-		//	} else if err := json.Unmarshal([]byte(ret), &tmpArr); err == nil && len(tmpArr) >= 2 {
-		//		//result = "Arr Ok"
-		//		ok = true
-		//	} else if htmlArr := HtmlReg.FindAllString(ret, -1); len(htmlArr) >= 4 {
-		//		//result = "Html Ok"
-		//		ok = true
-		//	} else if textArr := HanReg.FindAllString(ret, -1); len(textArr) > 0 {
-		//		text := strings.Join(textArr, "")
-		//		if len([]rune(text)) >= 50 {
-		//			//result = "Text Ok"
-		//			ok = true
-		//		}
-		//	}
-		//}
-		//if ok {
-		//	UpdateHeart("", "", code, "", "findlist") //记录列表页实际采集数据量心跳
-		//}
 		S.Push(lua.LString(ret))
 		scookie, _ := json.Marshal(retcookie)
 		S.Push(lua.LString(scookie))

+ 3 - 4
src/spider/util.go

@@ -2,12 +2,11 @@ package spider
 
 import (
 	qu "qfw/util"
-	"regexp"
 )
 
-var SpaceReg = regexp.MustCompile("[\\s\u3000\u2003\u00a0]+")
-var HtmlReg = regexp.MustCompile("<[^>]*?>")
-var HanReg = regexp.MustCompile("[\u4e00-\u9fa5]+")
+// var SpaceReg = regexp.MustCompile("[\\s\u3000\u2003\u00a0]+")
+// var HtmlReg = regexp.MustCompile("<[^>]*?>")
+// var HanReg = regexp.MustCompile("[\u4e00-\u9fa5]+")
 
 //初始化延迟采集站点集合
 func InitOther() {