Prechádzať zdrojové kódy

附件下载通用方法更新

maxiaoshan 2 rokov pred
rodič
commit
ce17190434
4 zmenil súbory, kde vykonal 20 pridanie a 7 odobranie
  1. 1 1
      src/config.json
  2. 14 2
      src/front/spider.go
  3. 3 2
      src/res/util/comm.lua
  4. 2 2
      src/spider/script.go

+ 1 - 1
src/config.json

@@ -73,7 +73,7 @@
     },
     "fileServer": "http://123.56.236.148:9333",
     "jsvmurl": "http://127.0.0.1:8080/jsvm",
-    "renderaddr": "http://8.131.72.226:8998/render.json",
+    "renderaddr": "http://59.110.6.43:8998/render.json",
     "proxyaddr": "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch",
     "proxyauthor": "Basic amlhbnl1MDAxOjEyM3F3ZSFB",
     "luadisablelib": {

+ 14 - 2
src/front/spider.go

@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"log"
 	"mongodb"
+	"regexp"
 	"sort"
 	"spider"
 	"strconv"
@@ -526,7 +527,7 @@ func LuaSaveLog(code, user string, data *map[string]interface{}, stype int) {
 	}
 }
 
-//检查列表页和三级页代码中是否含lua原生方法
+//爬虫保存时,检查列表页和三级页代码中是否含lua原生方法
 func LuaTextCheck(list, detail string, type_list int) (b bool, msg string) {
 	defer qu.Catch()
 	if LuaReg.MatchString(list) || LuaReg.MatchString(detail) {
@@ -537,9 +538,20 @@ func LuaTextCheck(list, detail string, type_list int) (b bool, msg string) {
 		b = true
 		msg = `三级页缺少data["delete"]="true"`
 	}
-	if type_list != 0 && !strings.Contains(list, "sendListNum") { //列表页专家模式且不含sendListNum
+	reg1 := regexp.MustCompile("sendListNum")
+	slIndexArr := reg1.FindAllStringIndex(list, -1)
+	if type_list != 0 && len(slIndexArr) == 0 { //列表页专家模式且不含sendListNum
 		b = true
 		msg = "代码中缺少sendListNum方法;" + msg
+	} else if type_list == 1 && len(slIndexArr) > 0 { //判断sendListNum方法的位置
+		reg2 := regexp.MustCompile("insert")
+		tsIndexArr := reg2.FindAllStringIndex(list, -1)
+		slIndex := slIndexArr[len(slIndexArr)-1]
+		tsIndex := tsIndexArr[len(tsIndexArr)-1]
+		if slIndex[1] < tsIndex[1] { //sendListNum方法必须在table.inset方法后
+			b = true
+			msg = "sendListNum方法位置错误;" + msg
+		}
 	}
 	return
 }

+ 3 - 2
src/res/util/comm.lua

@@ -549,8 +549,9 @@ function common.getFilesLinkByTag(href,tags,content,withend)
 				break
 			end
 		end
-	
-		if statehref~=nil and item["title"]~="" then
+		--有文件类型且在filetype中,才下载附件
+		--if statehref~=nil and item["title"]~="" then
+		if item["title"]~="" then
 			local has = false
 			for _,tmphref in pairs(ahref) do
 				if tmphref == item["href"] then

+ 2 - 2
src/spider/script.go

@@ -319,7 +319,7 @@ func (s *Script) LoadScript(downloadnode, script string, isfile ...string) {
 			url = strings.TrimSpace(url)
 			ret := DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout)
 			qu.Debug(GarbledCodeReg.FindAllString(string(ret), -1), len(ret))
-			if ret == nil || len(ret) < 1024*5 {
+			if ret == nil || len(ret) < 1024*3 {
 				qu.Debug("下载文件出错!")
 			} else {
 				ftype = qu.GetFileType(ret)
@@ -642,7 +642,7 @@ func (s *Script) LoadScript(downloadnode, script string, isfile ...string) {
 
 		name, size, ftype, fid := "", "", "", ""
 		qu.Debug(GarbledCodeReg.FindAllString(string(ret), -1), len(ret))
-		if ret == nil || len(ret) < 1024*5 {
+		if ret == nil || len(ret) < 1024*3 {
 			qu.Debug("下载文件出错!")
 		} else {
 			ftype = qu.GetFileType(ret)