فهرست منبع

高性能模式列表页并发采集

maxiaoshan 1 سال پیش
والد
کامیت
5777df76f1
4فایلهای تغییر یافته به همراه332 افزوده شده و 399 حذف شده
  1. 7 7
      src/config.json
  2. 1 1
      src/spider/front.go
  3. 33 277
      src/spider/handler.go
  4. 291 114
      src/spider/spider.go

+ 7 - 7
src/config.json

@@ -11,17 +11,17 @@
     },
     "editoraddr": "http://127.0.0.1:6011/spider/infos",
     "msgname": "爬虫采集平台7100",
-    "msgserveraddr": "spdata.jianyu360.com:801",
+    "msgserveraddr": "spdata.jianyu360.com:803",
     "msgserveraddrfile": "spdata.jianyu360.com:802",
     "msgserveraddrchromedp": "spdata.jianyu360.com:807",
 	"isdelay":false,
     "working": 1,
     "chansize": 4,
     "detailchansize": 20,
-    "uploadevent": 7400,
+    "uploadevent": 7100,
     "logLevel": 1,
     "daynum": 6,
-    "modal": 1,
+    "modal": 0,
     "ishistoryevent": false,
     "threadbasenum": 50,
     "threadupperlimit": 10,
@@ -29,7 +29,7 @@
     "jsserveraddress":  "127.0.0.1:8031",
     "tesseractadd": "http://test.qmx.top:1688",
     "testdir": "res/test/spider_test.lua",
-    "redisservers": "list=192.168.3.207:1779",
+    "redisservers": "list=192.168.3.166:1579",
     "bloomredisservers": "href=192.168.3.207:1679",
     "word":{
     	"keyword":"(抽签|中标|招标|成交|合同|中标候选人|资格预审|拟建|邀请|询价|比选|议价|竞价|磋商|采购|招投标|答疑|变更公告|更正公告|竞争性谈判|竞谈|意见征询|澄清|单一来源|流标|废标|验收公告|中止|终止|违规|处罚|征集公告|开标结果|评审结果|监理|招租|租赁|评判结果|项目|遴选|补遗|竞标|征求意见)",
@@ -42,11 +42,11 @@
         "ossBucketName":"jy-editor"
     },
     "pageturninfo": {
-        "repeatpagetimeslimit_w0": 10,
+        "repeatpagetimeslimit_w0": 3,
         "repeatpagetimeslimit_w1": 3,
-        "turnpagemaxlimit_w0": 100,
+        "turnpagemaxlimit_w0": 4,
         "turnpagemaxlimit_w1": 50,
-        "nextpagemaxlimit_w0": 100,
+        "nextpagemaxlimit_w0": 5,
         "nextpagemaxlimit_w1": 50,
         "listparalleltasklimit": 3
     },

+ 1 - 1
src/spider/front.go

@@ -657,7 +657,7 @@ func getSpiders(code, state string, currPage int64) []interface{} {
 		spider["targetChannelUrl"] = v.TargetChannelUrl
 		spider["lowlimit"] = v.LowerLimit
 		spider["uplimit"] = v.UpperLimit
-		spider["userName"] = v.UserName
+		spider["userName"] = v.MUserName
 		if state == "abnormal" {
 			if v.YesterdayDowncount == 0 {
 				if !v.Stop {

+ 33 - 277
src/spider/handler.go

@@ -24,9 +24,9 @@ import (
 
 var SpiderHeart sync.Map = sync.Map{} //爬虫心跳
 
-var Allspiders sync.Map = sync.Map{}  //采集列表页爬虫集合
-var Allspiders2 sync.Map = sync.Map{} //采集详情页爬虫集合
-var LoopListPath sync.Map = sync.Map{}
+var Allspiders sync.Map = sync.Map{}   //存储正在执行采集列表页任务的爬虫集合
+var Allspiders2 sync.Map = sync.Map{}  //存储正在执行采集详情页任务的爬虫集合
+var LoopListPath sync.Map = sync.Map{} //存储爬虫集合
 
 //var ChanDels = map[int]string{}
 //var lock sync.Mutex
@@ -72,31 +72,17 @@ func NoQueueScript() {
 			if errstr == "" && sp != nil && sp.Code != "nil" { //脚本加载成功
 				//sp.Index = qu.IntAll(key)
 				//sp2.Index = qu.IntAll(key)
-				if info["createuser"] != "" {
-					sp.UserName = info["createuser"]
-				}
-				if info["createuseremail"] != "" {
-					sp.UserEmail = info["createuseremail"]
-				}
-				sp.MUserName = info["modifyuser"]
-				sp.MUserEmail = info["modifyemail"]
 				Allspiders.Store(sp.Code, sp)
 				for _, tmp := range *list {
 					if qu.ObjToString(tmp["code"]) == sp.Code {
 						sp.UpperLimit = qu.IntAll(tmp["uplimit"])
-						//sp2.UpperLimit = qu.IntAll(tmp["uplimit"])
 						sp.LowerLimit = qu.IntAll(tmp["lowlimit"])
-						//sp2.LowerLimit = qu.IntAll(tmp["lowlimit"])
 						break
 					}
 				}
 
-				if util.Config.Modal == 1 { //列表页、三级页分开采集模式
+				if util.Config.Modal == 1 && !util.Config.IsHistoryEvent { //列表页、三级页分开采集模式
 					sp2, _ := CreateSpider(code, script, true, false)
-					sp2.UserName = sp.UserName
-					sp2.UserEmail = sp.UserEmail
-					sp2.MUserName = sp.MUserName
-					sp2.MUserEmail = sp.MUserEmail
 					sp2.IsMainThread = true //多线程采集时使用
 					Allspiders2.Store(sp.Code, sp2)
 				}
@@ -173,16 +159,12 @@ func QueueUpScriptList() {
 						}
 					}
 				}
-				logger.Info("Code:", code, "Is Downloading List:", old_is_running, "task num:", sp_old.ListParallelTaskNum)
+				logger.Info("Code:", code, "Is Downloading List:", old_is_running, ",subtask num:", sp_old.ListParallelTaskNum)
 				if !old_is_running { //判断当前爬虫上轮任务是否执行完成
 					sp, errstr := CreateSpider(code, script, false, false)
 					//logger.Info("初始化脚本是否成功:", sp != nil, e.Value)
 					if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
 						//sp.Index = qu.IntAll(key)
-						sp.UserName = info["createuser"]
-						sp.UserEmail = info["createuseremail"]
-						sp.MUserName = info["modifyuser"]
-						sp.MUserEmail = info["modifyemail"]
 						sp.ListParallelTaskNum = sp_old.ListParallelTaskNum //继承子任务数量
 						Allspiders.Store(code, sp)
 						sp.StartJob()
@@ -223,10 +205,6 @@ func QueueUpScriptList() {
 					if errstr == "" && spTmp != nil && spTmp.Code != "nil" { //初始化脚本成功
 						sp_old.ListParallelTaskNum++
 						logger.Info(code, "子任务开始执行,当前子任务数", sp_old.ListParallelTaskNum)
-						spTmp.UserName = info["createuser"]
-						spTmp.UserEmail = info["createuseremail"]
-						spTmp.MUserName = info["modifyuser"]
-						spTmp.MUserEmail = info["modifyemail"]
 						//启动下载
 						go func(spt, spo *Spider) {
 							defer func() {
@@ -297,10 +275,6 @@ func QueueUpScriptDetail() {
 					sp, errstr := CreateSpider(code, script, true, false)
 					if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
 						//sp.Index = qu.IntAll(key)
-						sp.UserName = info["createuser"]
-						sp.UserEmail = info["createuseremail"]
-						sp.MUserName = info["modifyuser"]
-						sp.MUserEmail = info["modifyemail"]
 						sp.IsMainThread = true
 						Allspiders2.Store(code, sp)
 						go sp.DownloadListDetail(false) //下载三级页信息
@@ -454,24 +428,6 @@ func UpdateSpiderByCodeState(code, state string) (bool, error) {
 					logger.Info("下架脚本,Allspiders删除")
 				}
 			}
-			//LoopListPath.Range(func(k, v interface{}) bool {
-			//	//if v != nil {
-			//	//	info, _ := v.(map[string]string)
-			//	//	if info["code"] == code {
-			//	//		LoopListPath.Store(k, nil)
-			//	//		lock.Lock()
-			//	//		defer lock.Unlock()
-			//	//		ChanDels[qu.IntAll(k)] = code
-			//	//		logger.Info("下架脚本,LoopListPath更新为nil,ChanDels中位置:", k)
-			//	//	}
-			//	//}
-			//	if k == code {
-			//		LoopListPath.Delete(k)
-			//		logger.Info(code, "脚本下架成功")
-			//		return false //跳出循环
-			//	}
-			//	return true
-			//})
 		} else { //高性能模式
 			for _, as := range []sync.Map{Allspiders, Allspiders2} {
 				if tmp, ok := as.Load(code); ok {
@@ -528,12 +484,6 @@ func UpdateSpiderByCodeState(code, state string) (bool, error) {
 				if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
 					sp := spd.(*Spider)
 					sp.ScriptFile = v["script"]
-					if v["createuser"] != "" {
-						sp.UserName = v["createuser"]
-					}
-					if v["createuseremail"] != "" {
-						sp.UserEmail = v["createuseremail"]
-					}
 					sp.MUserName = v["modifyuser"]
 					sp.MUserEmail = v["modifyemail"]
 					Allspiders.Store(k, sp)
@@ -549,12 +499,6 @@ func UpdateSpiderByCodeState(code, state string) (bool, error) {
 				if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
 					sp2 := spd2.(*Spider)
 					sp2.ScriptFile = v["script"]
-					if v["createuser"] != "" {
-						sp2.UserName = v["createuser"]
-					}
-					if v["createuseremail"] != "" {
-						sp2.UserEmail = v["createuseremail"]
-					}
 					sp2.MUserName = v["modifyuser"]
 					sp2.MUserEmail = v["modifyemail"]
 					sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
@@ -572,69 +516,31 @@ func UpdateSpiderByCodeState(code, state string) (bool, error) {
 	} else { //脚本上架
 		scriptMap := getSpiderScriptDB(code)
 		logger.Info("上架新增脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
-		if util.Config.Modal == 1 { //分开采集
+		if util.Config.Modal == 1 && !util.Config.IsHistoryEvent { //分开采集
 			go UpdateHighListDataByCode(code)
 		}
 		if util.Config.Working == 1 { //排队模式
 			for _, v := range scriptMap {
-				listsize := 0
-				listHas := false
-				count_ok, count_no := 0, 0
+				LoopListPath.Store(code, v) //更新或新增爬虫信息
+				listsize, count_ok, count_no := 0, 0, 0
+				isOk := false
 				LoopListPath.Range(func(key, val interface{}) bool {
 					listsize++
-					if tmp, ok := val.(map[string]string); ok { //此处判断仅仅为了得到count_ok的值,可直接判断key==code
+					if tmp, ok := val.(map[string]string); ok {
 						count_ok++
-						if tmp["code"] == code && code == key { //队列存在,重载脚本
-							logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
-							listHas = true
-							LoopListPath.Store(key, v)
-							logger.Info("队列模式更新列表页信息状态", code)
+						if tmp["code"] == code && key == code { //队列存在
+							isOk = true
 						}
 					} else {
 						count_no++
 					}
 					return true
 				})
-				logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
-				if !listHas { //队列中不存在,新增
-					logger.Info("上架新增脚本,队列中不存在")
-					LoopListPath.Store(code, v) //上架
-					// lock.Lock()
-					// defer lock.Unlock()
-					// if len(ChanDels) > 0 {
-					// 	for i, _ := range ChanDels {
-					// 		logger.Info("上架新增脚本,替补队列中位置", i)
-					// 		LoopListPath.Store(i, v)
-					// 		delete(ChanDels, i)
-					// 		break
-					// 	}
-					// } else {
-					// 	logger.Info("上架新增脚本,新增队列中位置", listsize)
-					// 	LoopListPath.Store(listsize, v) //上架
-					// }
-					//校验是否上架成功
-					saveList := false //记录是否上架成功
-					listsize, count_ok, count_no = 0, 0, 0
-					LoopListPath.Range(func(key, val interface{}) bool {
-						listsize++
-						if tmp, ok := val.(map[string]string); ok {
-							count_ok++
-							if tmp["code"] == code && key == code { //队列存在
-								saveList = true
-								logger.Info("上架脚本成功", code)
-							}
-						} else {
-							count_no++
-						}
-						return true
-					})
-					logger.Info("上架爬虫后队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
-					if !saveList { //上架失败
-						logger.Info("上架脚本", code, "	失败")
-						return false, errors.New("use " + code + " failed")
-					}
+				logger.Info("上架脚本", isOk, code)
+				logger.Info("上架爬虫后队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
+				if !isOk {
+					return false, errors.New("use " + code + " failed")
 				}
-				logger.Info("上架新增脚本", code)
 				up = true
 			}
 		} else { //高性能模式
@@ -644,10 +550,6 @@ func UpdateSpiderByCodeState(code, state string) (bool, error) {
 				if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
 					sp := spd.(*Spider)
 					sp.ScriptFile = v["script"]
-					sp.UserName = v["createuser"]
-					sp.UserEmail = v["createuseremail"]
-					sp.MUserName = v["modifyuser"]
-					sp.MUserEmail = v["modifyemail"]
 					UpdateSpider(sp, k, v["script"]) //爬虫其他信息更新
 					//sp.LoadScript(&sp.Name, &sp.Channel, &sp.MUserName, k, sp.ScriptFile, true, false) //更新上架,重载脚本
 					Allspiders.Store(k, sp)
@@ -657,10 +559,6 @@ func UpdateSpiderByCodeState(code, state string) (bool, error) {
 				} else { //新增脚本
 					sp, errstr := CreateSpider(k, v["script"], true, false)
 					if errstr == "" && sp != nil && sp.Code != "nil" {
-						sp.UserName = v["createuser"]
-						sp.UserEmail = v["createuseremail"]
-						sp.MUserName = v["modifyuser"]
-						sp.MUserEmail = v["modifyemail"]
 						Allspiders.Store(k, sp)
 						sp.StartJob()
 						up = true
@@ -691,30 +589,21 @@ func UpdateSpiderByCodeState(code, state string) (bool, error) {
 					}
 				}
 				//2、Allspiders2对应7100、7110、7400上架采集三级页数据(Allspiders2三级页爬虫集合)
-				if util.Config.Modal == 1 {
+				if util.Config.Modal == 1 && !util.Config.IsHistoryEvent {
 					//Allspiders2
 					if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
 						sp2 := spd2.(*Spider)
 						sp2.ScriptFile = v["script"]
-						sp2.UserName = v["createuser"]
-						sp2.UserEmail = v["createuseremail"]
-						sp2.MUserName = v["modifyuser"]
-						sp2.MUserEmail = v["modifyemail"]
 						UpdateSpider(sp2, k, v["script"])                                                       //爬虫其他信息更新
 						sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
 						Allspiders2.Store(k, sp2)                                                               //重载后放入集合
-						UpdateHighListDataByCode(k)                                                             //爬虫更新上架后,重置数据state=0
 						// up = true
 						// err = nil
 						logger.Info("Allspiders2上架重载脚本", sp2.Code)
 					} else { //新增脚本
 						sp2, errstr := CreateSpider(k, v["script"], true, false)
 						if errstr == "" && sp2 != nil && sp2.Code != "nil" {
-							sp2.UserName = v["createuser"]
-							sp2.UserEmail = v["createuseremail"]
-							sp2.MUserName = v["modifyuser"]
-							sp2.MUserEmail = v["modifyemail"]
-							sp2.IsMainThread = true         //多线程采集时使用
+							sp2.IsMainThread = true         //多线程采集详情页时使用
 							go sp2.DownloadHighDetail(true) //根据列表页数据下载三级页
 							Allspiders2.Store(k, sp2)
 							// up = true
@@ -752,12 +641,6 @@ func ReloadSpiderFile() {
 				sp := spd.(*Spider)
 				logger.Info("定时重载脚本", sp.Code)
 				sp.ScriptFile = v["script"]
-				if v["createuser"] != "" {
-					sp.UserName = v["createuser"]
-				}
-				if v["createuseremail"] != "" {
-					sp.UserEmail = v["createuseremail"]
-				}
 				sp.MUserName = v["modifyuser"]
 				sp.MUserEmail = v["modifyemail"]
 				as.Store(k, sp)
@@ -780,12 +663,6 @@ func ReloadSpiderFile() {
 					sp, errstr = CreateSpider(k, v["script"], true, false)
 				}
 				if errstr == "" && sp != nil && sp.Code != "nil" {
-					if v["createuser"] != "" {
-						sp.UserName = v["createuser"]
-					}
-					if v["createuseremail"] != "" {
-						sp.UserEmail = v["createuseremail"]
-					}
 					sp.MUserName = v["modifyuser"]
 					sp.MUserEmail = v["modifyemail"]
 					as.Store(k, sp)
@@ -935,9 +812,12 @@ func CreateSpider(code, luafile string, newstate, thread bool) (*Spider, string)
 		spider.Timeout = int64(spiderTimeout)
 	}
 	spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
-	spider.UserName = spider.GetVar("spiderUserName")
-	spider.UserEmail = spider.GetVar("spiderUserEmail")
-	spider.UploadTime = spider.GetVar("spiderUploadTime")
+	//spider.UserName = spider.GetVar("spiderUserName")
+	//spider.UserEmail = spider.GetVar("spiderUserEmail")
+	//spider.UploadTime = spider.GetVar("spiderUploadTime")
+	spider.MUserName = spider.GetVar("spiderUserName")
+	spider.MUserEmail = spider.GetVar("spiderUserEmail")
+
 	//新增历史补漏
 	//qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
 	spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
@@ -990,7 +870,8 @@ func UpdateSpider(spider *Spider, code, script string) {
 	} else {
 		spider.Timeout = int64(spiderTimeout)
 	}
-
+	spider.MUserName = spider.GetVar("spiderUserName")
+	spider.MUserEmail = spider.GetVar("spiderUserEmail")
 	spider.TargetChannelUrl = ts.GetVar("spiderTargetChannelUrl") //栏目地址
 	//新增历史补漏
 	spider.IsHistoricalMend = ts.GetBoolVar("spiderIsHistoricalMend")
@@ -1001,133 +882,6 @@ func UpdateSpider(spider *Spider, code, script string) {
 	spider.Infoformat = spider.GetIntVar("spiderInfoformat")
 }
 
-//排队模式生成爬虫
-func NewSpider_New(code, luafile string, newstate bool) (*Spider, string) {
-	defer qu.Catch()
-	spider := &Spider{}
-	err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, false)
-	if err != "" {
-		return nil, err
-	}
-	spider.Code = spider.GetVar("spiderCode")
-	spider.Script.SCode = spider.Code
-	spider.Name = spider.GetVar("spiderName")
-	spider.Channel = spider.GetVar("spiderChannel")
-	//spider.LastExecTime = GetLastExectime(spider.Code)
-	spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
-	spider.Collection = spider.GetVar("spider2Collection")
-	spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
-	spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
-	spider.StoreMode = spider.GetIntVar("spiderStoreMode")
-	spider.CoverAttr = spider.GetVar("spiderCoverAttr")
-	spiderSleepBase := spider.GetIntVar("spiderSleepBase")
-	if spiderSleepBase == -1 {
-		spider.SleepBase = 1000
-	} else {
-		spider.SleepBase = spiderSleepBase
-	}
-	spiderSleepRand := spider.GetIntVar("spiderSleepRand")
-	if spiderSleepRand == -1 {
-		spider.SleepRand = 1000
-	} else {
-		spider.SleepRand = spiderSleepRand
-	}
-	spiderTimeout := spider.GetIntVar("spiderTimeout")
-	if spiderTimeout == -1 {
-		spider.Timeout = 60
-	} else {
-		spider.Timeout = int64(spiderTimeout)
-	}
-	spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
-	if v, ok := Allspiders.Load(spider.Code); ok {
-		sp := v.(*Spider)
-		spider.TodayDowncount = sp.TodayDowncount
-		spider.ToDayRequestNum = sp.ToDayRequestNum
-		spider.YesterdayDowncount = sp.YesterdayDowncount
-		spider.YestoDayRequestNum = sp.YestoDayRequestNum
-		spider.TotalDowncount = sp.TotalDowncount
-		spider.TotalRequestNum = sp.TotalRequestNum
-		spider.ErrorNum = sp.ErrorNum
-		spider.RoundCount = sp.RoundCount
-	}
-	spider.UserName = spider.GetVar("spiderUserName")
-	spider.UserEmail = spider.GetVar("spiderUserEmail")
-	spider.UploadTime = spider.GetVar("spiderUploadTime")
-
-	//新增历史补漏
-	spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
-	spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
-	//新老爬虫
-	spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
-	//爬虫类型
-	spider.Infoformat = spider.GetIntVar("spiderInfoformat")
-	return spider, ""
-}
-
-//高性能模式生成爬虫
-func NewSpider(code, luafile string) (*Spider, string) {
-	defer qu.Catch()
-	spider := &Spider{}
-	err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, false)
-	if err != "" {
-		return nil, err
-	}
-	spider.Code = spider.GetVar("spiderCode")
-	spider.SCode = spider.Code
-	spider.Name = spider.GetVar("spiderName")
-	spider.Channel = spider.GetVar("spiderChannel")
-
-	//spider.LastExecTime = GetLastExectime(spider.Code)
-	spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
-	spider.Collection = spider.GetVar("spider2Collection")
-	spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
-	//spider.Thread = int64(spider.GetIntVar("spiderThread"))
-	spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
-	spider.StoreMode = spider.GetIntVar("spiderStoreMode")
-	spider.CoverAttr = spider.GetVar("spiderCoverAttr")
-	spiderSleepBase := spider.GetIntVar("spiderSleepBase")
-	if spiderSleepBase == -1 {
-		spider.SleepBase = 1000
-	} else {
-		spider.SleepBase = spiderSleepBase
-	}
-	spiderSleepRand := spider.GetIntVar("spiderSleepRand")
-	if spiderSleepRand == -1 {
-		spider.SleepRand = 1000
-	} else {
-		spider.SleepRand = spiderSleepRand
-	}
-	spiderTimeout := spider.GetIntVar("spiderTimeout")
-	if spiderTimeout == -1 {
-		spider.Timeout = 60
-	} else {
-		spider.Timeout = int64(spiderTimeout)
-	}
-	spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
-	date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
-	tmp := GetDownloadLast(spider.Code, date) //
-	if len(tmp) > 0 {
-		spider.TodayDowncount = int32(qu.IntAll(tmp["todaydowncount"]))
-		spider.ToDayRequestNum = int32(qu.IntAll(tmp["todaydownreq"]))
-		spider.YesterdayDowncount = int32(qu.IntAll(tmp["yesdowncount"]))
-		spider.YestoDayRequestNum = int32(qu.IntAll(tmp["yesdownreq"]))
-		spider.TotalDowncount = spider.TodayDowncount + int32(qu.IntAll(tmp["totaldown"]))
-		spider.TotalRequestNum = spider.ToDayRequestNum + int32(qu.IntAll(tmp["totalreq"]))
-	}
-	spider.UserName = spider.GetVar("spiderUserName")
-	spider.UserEmail = spider.GetVar("spiderUserEmail")
-	spider.UploadTime = spider.GetVar("spiderUploadTime")
-	//新增历史补漏
-	//qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
-	spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
-	spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
-	//新老爬虫
-	spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
-	//爬虫类型
-	spider.Infoformat = spider.GetIntVar("spiderInfoformat")
-	return spider, ""
-}
-
 //多线程生成爬虫
 func NewSpiderForThread(code, luafile string) (*Spider, string) {
 	defer qu.Catch()
@@ -1170,9 +924,9 @@ func NewSpiderForThread(code, luafile string) (*Spider, string) {
 	}
 	spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
 
-	spider.UserName = spider.GetVar("spiderUserName")
-	spider.UserEmail = spider.GetVar("spiderUserEmail")
-	spider.UploadTime = spider.GetVar("spiderUploadTime")
+	//spider.UserName = spider.GetVar("spiderUserName")
+	//spider.UserEmail = spider.GetVar("spiderUserEmail")
+	//spider.UploadTime = spider.GetVar("spiderUploadTime")
 	//新增历史补漏
 	//qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
 	spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
@@ -1242,13 +996,15 @@ func GetScriptByTmp(luaconfig map[string]interface{}) string {
 	if luaconfig["contentcheck"] == nil {
 		luaconfig["contentcheck"] = ""
 	}
+	modifyUser := qu.ObjToString(luaconfig["modifyuser"])
+	modifyUserEmail := qu.ObjToString(luaconfig["createuseremail"])
 	if luaconfig != nil && len(luaconfig) > 0 {
 		common := luaconfig["param_common"].([]interface{})
 		//新增spiderIsHistoricalMend spiderIsMustDownload
 		if len(common) == 15 {
-			common = append(common, "", "", "")
+			common = append(common, modifyUser, modifyUserEmail, "")
 		} else {
-			common = append(common, false, false, "", "", "")
+			common = append(common, false, false, modifyUser, modifyUserEmail, "")
 		}
 		for k, v := range common {
 			if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 {

+ 291 - 114
src/spider/spider.go

@@ -49,33 +49,33 @@ type SpiderFlow struct {
 // Spider 爬虫
 type Spider struct {
 	Script
-	Code                            string //代码
-	Name                            string //名称
-	Channel                         string //站点
-	DownDetail                      bool   //是否下载详细页
-	Stop                            bool   //停止标志
-	Pass                            bool   //暂停标志
-	LastPubshTime                   int64  //最后发布时间
-	LastHeartbeat                   int64  //最后心跳时间
-	SpiderRunRate                   int64  //执行频率
-	ExecuteOkTime                   int64  //任务执行成功/完成时间
-	Collection                      string //写入表名
-	Thread                          int64  //线程数
-	LastExecTime                    int64  //最后执行时间
-	LastDowncount                   int32  //最后一次下载量
-	TodayDowncount                  int32  //今日下载量
-	YesterdayDowncount              int32  //昨日下载量
-	TotalDowncount                  int32  //总下载量
-	RoundCount                      int32  //执行轮次
-	StoreMode                       int    //存储模式
-	StoreToMsgEvent                 int    //消息类型
-	CoverAttr                       string //按属性判重数据
-	SleepBase                       int    //基本延时
-	SleepRand                       int    //随机延时
-	TargetChannelUrl                string //栏目页地址
-	UpperLimit, LowerLimit          int    //正常值上限、下限
-	UserName, UserEmail, UploadTime string //开发者名称,开发者邮箱,脚本上传时间
-	MUserName, MUserEmail           string //维护人,维护人邮箱
+	Code                   string //代码
+	Name                   string //名称
+	Channel                string //站点
+	DownDetail             bool   //是否下载详细页
+	Stop                   bool   //停止标志
+	Pass                   bool   //暂停标志
+	LastPubshTime          int64  //最后发布时间
+	LastHeartbeat          int64  //最后心跳时间
+	SpiderRunRate          int64  //执行频率
+	ExecuteOkTime          int64  //任务执行成功/完成时间
+	Collection             string //写入表名
+	Thread                 int64  //线程数
+	LastExecTime           int64  //最后执行时间
+	LastDowncount          int32  //最后一次下载量
+	TodayDowncount         int32  //今日下载量
+	YesterdayDowncount     int32  //昨日下载量
+	TotalDowncount         int32  //总下载量
+	RoundCount             int32  //执行轮次
+	StoreMode              int    //存储模式
+	StoreToMsgEvent        int    //消息类型
+	CoverAttr              string //按属性判重数据
+	SleepBase              int    //基本延时
+	SleepRand              int    //随机延时
+	TargetChannelUrl       string //栏目页地址
+	UpperLimit, LowerLimit int    //正常值上限、下限
+	//UserName, UserEmail, UploadTime string //开发者名称,开发者邮箱,脚本上传时间
+	MUserName, MUserEmail string //维护人,维护人邮箱
 	//Index                           int    //数组索引
 	//历史补漏
 	IsHistoricalMend    bool //是否是历史补漏爬虫
@@ -96,19 +96,11 @@ var (
 	TimeChan          = make(chan bool, 1)
 	Reg               = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
 	RestrictAccessReg = regexp.MustCompile(`访问被拒绝`)
-	//DomainNameReg     = regexp.MustCompile(`(?://).+?(?:)[::/]`)
-	//RepDomainNameReg  = regexp.MustCompile(`[::/]+`)
-	//Today             string
-	//SpiderFlowMap     = sync.Map{} //code:{"2022-05-16":SpiderFlow}
-	AllThreadNum int64
-	DelaySiteMap map[string]*DelaySite //延迟采集站点集合
-
-	//UpdataMgoCache = make(chan []map[string]interface{}, 1000)   //更新要重下数据的状态
-	//SP = make(chan bool, 5)
-	//SaveMgoCache = make(chan map[string]interface{}, 1000)       //保存爬虫采集非本站点数据
-	//SPS = make(chan bool, 5)
-	UpdataHeartCache = make(chan []map[string]interface{}, 1000) //更新爬虫心跳信息
-	SPH              = make(chan bool, 5)
+	AllThreadNum      int64
+	ListAllThreadNum  int64
+	DelaySiteMap      map[string]*DelaySite                       //延迟采集站点集合
+	UpdataHeartCache  = make(chan []map[string]interface{}, 1000) //更新爬虫心跳信息
+	SPH               = make(chan bool, 5)
 
 	DataBakSaveCache = make(chan map[string]interface{}, 1000) //保存采集信息详情页记录
 	DB_CH            = make(chan bool, 5)
@@ -155,7 +147,13 @@ func (s *Spider) ExecJob(reload bool) {
 	//if err != nil {
 	//	logger.Error(s.Code, err)
 	//}
-	err := s.DownListPageItem() //下载列表
+	//判断是否使用高并发下载三级页
+	var err interface{}
+	if util.Config.Working == 0 && util.Config.Modal == 1 && !util.Config.IsHistoryEvent {
+		err = s.DownListPageItemByThreads() //下载列表
+	} else {
+		err = s.DownListPageItem() //下载列表
+	}
 	if err != nil {
 		logger.Error(s.Code, err)
 	}
@@ -228,7 +226,6 @@ func (s *Spider) GetLastPublishTime() (errs interface{}) {
 //下载列表
 func (s *Spider) DownListPageItem() (errs interface{}) {
 	defer qu.Catch()
-	s.AlreadyGetPageHeart = map[int]bool{}                                     //重置记录
 	start, max := s.GetIntVar("spiderStartPage"), s.GetIntVar("spiderMaxPage") //起始页、最大页
 	s.MaxPage = max                                                            //
 	//tmpMax := max                                                              //临时记录最大页
@@ -283,7 +280,6 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 			if downtimes < 2 {
 				downtimes++
 				start--
-				//} else if start > tmpMax && isRunRepeatList { //超过重试次数,视为本页重复
 			} else if isRunRepeatList { //超过重试次数,视为本页重复
 				if repeatPageNum+1 == start {
 					repeatPageTimes++ //次数加1
@@ -298,14 +294,13 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 		lv := s.L.Get(-1)
 		s.L.Pop(1)
 		if tbl, ok := lv.(*lua.LTable); ok {
-			list := []map[string]interface{}{}
+			//list := []map[string]interface{}{}
 			//qu.Debug("当前页数据量:", tbl.Len())
 			if tabLen := tbl.Len(); tabLen > 0 { //列表页有数据,根据列表页信息下载三级页
 				repeatListNum := 0 // 当前列表页连接重复个数
 				for i := 1; i <= tabLen; i++ {
 					v := tbl.RawGetInt(i).(*lua.LTable)
 					tmp := util.TableToMap(v)
-					//s.ThisSiteData(tmp)      //统计当前下载数据是否是本站点数据
 					if !s.IsHistoricalMend { //不是历史补漏
 						tmp["dataging"] = 0 //数据中打标记dataging=0
 						if s.DownDetail {
@@ -326,11 +321,8 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 						s.HistoricalMendDownloadDetailItem(tmp) //历史补漏下载三级页
 					}
 				}
-				//if start <= tmpMax { //数量赋值
 				repeatAllNum += repeatListNum
 				downloadAllNum += tabLen
-				//}
-				//if start > tmpMax && isRunRepeatList { //执行连续页码判重
 				if isRunRepeatList { //执行连续页码判重
 					if repeatListNum >= tabLen { //当前start列表页全部数据都已采集
 						//qu.Debug("重复页:", repeatPageNum, "当前页:", start)
@@ -345,17 +337,16 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 						repeatPageNum = 0
 					}
 				}
-				if !s.IsHistoricalMend && !s.DownDetail {
-					if len(list) > 0 { //保存信息入库
-						StoreBlak(s.StoreMode, s.StoreToMsgEvent, s.Collection, s.CoverAttr, list)
-					}
-				}
+				//if !s.IsHistoricalMend && !s.DownDetail {
+				//	if len(list) > 0 { //保存信息入库
+				//		StoreBlak(s.StoreMode, s.StoreToMsgEvent, s.Collection, s.CoverAttr, list)
+				//	}
+				//}
 			} else { //避免因网络问题当前下载的列表页无数据,重新请求下载列表页
 				if downtimes < 2 {
 					downtimes++
 					start--
 					continue
-					//} else if start > tmpMax && isRunRepeatList { //超过重试次数,视为本页重复
 				} else if isRunRepeatList { //超过重试次数,视为本页重复
 					if repeatPageNum+1 == start {
 						repeatPageTimes++ //次数加1
@@ -370,7 +361,6 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 				downtimes++
 				start--
 				continue
-				//} else if start > tmpMax && isRunRepeatList { //超过重试次数,视为本页重复
 			} else if isRunRepeatList { //超过重试次数,视为本页重复
 				if repeatPageNum+1 == start {
 					repeatPageTimes++ //次数加1
@@ -403,18 +393,20 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 			"alltimes": 1,
 		}
 		//记录翻页是否成功
-		if s.PageOneTextHash != "" {
-			if s.PageTwoTextHash != "" {
-				if s.PageOneTextHash != s.PageTwoTextHash {
-					inc["page_success"] = 1
+		if s.MaxPage > 1 {
+			if s.PageOneTextHash != "" {
+				if s.PageTwoTextHash != "" {
+					if s.PageOneTextHash != s.PageTwoTextHash {
+						inc["page_success"] = 1
+					} else {
+						inc["page_fail"] = 1
+					}
 				} else {
 					inc["page_fail"] = 1
 				}
-			} else {
-				inc["page_fail"] = 1
+			} else if s.PageTwoTextHash != "" {
+				inc["page_onefail"] = 1
 			}
-		} else if s.PageTwoTextHash != "" {
-			inc["page_onefail"] = 1
 		}
 		if downloadAllNum > 0 {
 			rate := float64(downloadAllNum-repeatAllNum) / float64(downloadAllNum)
@@ -425,12 +417,12 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 				} else {
 					inc["oh_percent"] = 1
 				}
-			} else if rate >= 0.9 {
-				inc["nt_percent"] = 1
-			} else if rate >= 0.8 {
-				inc["et_percent"] = 1
-			} else {
-				inc["other_percent"] = 1
+				//} else if rate >= 0.9 {
+				//	inc["nt_percent"] = 1
+				//} else if rate >= 0.8 {
+				//	inc["et_percent"] = 1
+				//} else {
+				//	inc["other_percent"] = 1
 			}
 			if isRunRepeatList && start > max { //连续翻页超过了上限
 				if !s.ContinueDownListChildTask {
@@ -442,8 +434,173 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 		} else {
 			inc["zero"] = 1
 		}
-		if s.RecordedHeartInfo {
-			set["lastgetdatatime"] = time.Now().Unix() //记录最新轮次采集到数据的时间
+		query := map[string]interface{}{
+			"date":       sDate,
+			"spidercode": s.Code,
+		}
+		coll := "spider_downloadrate"
+		if s.ContinueDownListChildTask {
+			coll = "spider_downloadrate_child"
+		}
+		MgoS.Update(coll, query, map[string]interface{}{
+			"$set": set,
+			"$inc": inc,
+		}, true, false)
+	}
+	//信息重置
+	s.PageOneTextHash = ""
+	s.PageTwoTextHash = ""
+	return errs
+}
+
+//并发下载列表
+func (s *Spider) DownListPageItemByThreads() (errs interface{}) {
+	defer qu.Catch()
+	start, max := s.GetIntVar("spiderStartPage"), s.GetIntVar("spiderMaxPage") //起始页、最大页
+	s.MaxPage = max                                                            //记录爬虫配置的最大页
+	repeatAllNum := int64(0)                                                   //本轮采集总的重复个数
+	downloadAllNum := int64(0)                                                 //本轮采集总个数
+	if util.Config.IsHistoryEvent && s.GetVar("spiderType") == "history" {     //7000节点根据爬虫类型,取采集的最大页
+		max = s.GetIntVar("spiderHistoryMaxPage") //采集历史的爬虫,取历史最大页配置spiderHistoryMaxPage
+	}
+	repeatPageTimesLimit := 1 //记录页码连续判重的次数上限(默认1:for循环翻页时等效,至少+1)
+	isRunRepeatList := false  //是否执行列表页连续判重逻辑
+	//是否进行连续翻页判断,修改最大页
+	if !util.Config.IsHistoryEvent && util.Config.Modal == 1 && max > 1 && max < 101 { //除顺序采集模式和非历史节点外所有节点,采集列表页时进行连续10页判重
+		isRunRepeatList = true
+		repeatPageTimesLimit = util.Config.PageTurnInfo.RepeatPageTimesLimitW0
+		max = util.Config.PageTurnInfo.TurnPageMaxLimitW0 //高性能模式设置最大页为100
+		if util.Config.Working == 1 {                     //队列模式
+			repeatPageTimesLimit = util.Config.PageTurnInfo.RepeatPageTimesLimitW1 //连续判重页3
+			max = util.Config.PageTurnInfo.TurnPageMaxLimitW1                      //队列模式最大页50
+		}
+	}
+	//子任务判断
+	if s.ContinueDownListChildTask {
+		start = util.Config.PageTurnInfo.TurnPageMaxLimitW0 + 1
+		max = util.Config.PageTurnInfo.TurnPageMaxLimitW0 + util.Config.PageTurnInfo.NextPageMaxLimitW0
+		if util.Config.Working == 1 { //队列模式
+			start = util.Config.PageTurnInfo.TurnPageMaxLimitW1 + 1
+			max = util.Config.PageTurnInfo.TurnPageMaxLimitW1 + util.Config.PageTurnInfo.NextPageMaxLimitW1
+		}
+	}
+	//创建并发Spider对象
+	spChan := make(chan *Spider, 1)
+	if isRunRepeatList { //无限翻页模式设置spChan并发大小
+		spChan = make(chan *Spider, repeatPageTimesLimit)                       //并发数量由连续翻页判重数量决定
+		spChan <- s                                                             //加入通道
+		NewSpiderByScript(repeatPageTimesLimit-1, s.Code, s.ScriptFile, spChan) //创建多个Spider对象
+	} else {
+		spChan <- s //加入通道
+	}
+	endPage := 0 //结束页
+	for ; start <= max && !s.Stop; start += repeatPageTimesLimit {
+		if !s.Stop && !s.ContinueDownListChildTask { //在下载详情页时爬虫下架,此时不再存心跳信息
+			UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "list", start == 1) //记录所有节点列表页心跳
+		}
+		listWg := &sync.WaitGroup{}
+		isContinue := false //是否继续采集
+		//并发下载列表页
+		for listThreadNum := 0; listThreadNum < repeatPageTimesLimit; listThreadNum++ {
+			pagenum := start + listThreadNum //当前实际采集页码
+			if pagenum > max {               //并发采集时,每次开启repeatPageTimesLimit个并发,并发有可能超过max上限
+				break
+			}
+			spTmp := <-spChan //通道中取出sp对象
+			listWg.Add(1)
+			atomic.AddInt64(&ListAllThreadNum, 1)
+			endPage = pagenum + 1
+			go func(sp *Spider, pagenum int) {
+				defer func() {
+					spChan <- sp //处理完数据sp对象放回通道中
+					listWg.Done()
+					atomic.AddInt64(&ListAllThreadNum, -1)
+				}()
+				//下载某一页数据
+				downnum, repeatnum := sp.DownListOnePage(pagenum)
+				//汇总下载量
+				atomic.AddInt64(&downloadAllNum, int64(downnum))
+				atomic.AddInt64(&repeatAllNum, int64(repeatnum))
+				if downnum > 0 {
+					if downnum-repeatnum > 0 { //本页有新数据
+						isContinue = true
+					}
+					//使用并发采集列表页时,spider对象不是同一个,只能采集后统计
+					if pagenum == 1 { //将第一页sp采集信息的hash值赋值给s
+						s.PageOneTextHash = sp.PageOneTextHash
+					} else if pagenum == 2 { //将第二页sp采集信息的hash值赋值给s
+						s.PageTwoTextHash = sp.PageTwoTextHash
+					}
+				}
+				//qu.Debug("第", pagenum, "页采集信息:", downnum, repeatnum)
+			}(spTmp, pagenum)
+		}
+		listWg.Wait()
+		if !isContinue { //并发采集结果中,如果某页有新数据,继续采集,直到上限页
+			break
+		}
+	}
+	close(spChan) //关闭通道,释放资源
+	logger.Info(s.Code, "本轮列表页采集详情:", s.ContinueDownListChildTask, downloadAllNum, repeatAllNum, endPage, s.Stop)
+	if !util.Config.IsHistoryEvent && !s.Stop { //非历史节点统计下载率
+		nowTime := time.Now()
+		sDate := qu.FormatDate(&nowTime, qu.Date_Short_Layout)
+		set := map[string]interface{}{
+			"site":       s.Name,
+			"channel":    s.Channel,
+			"spidercode": s.Code,
+			"updatetime": nowTime.Unix(),
+			"event":      util.Config.Uploadevent,
+			"modifyuser": s.MUserName,
+			"maxpage":    s.MaxPage,
+			"runrate":    s.SpiderRunRate,
+			"endpage":    endPage,
+			"date":       sDate,
+		}
+		inc := map[string]interface{}{
+			"alltimes": 1,
+		}
+		//记录翻页是否成功
+		if s.MaxPage > 1 { //最大页为1的,用列表页是否异常体现爬虫运行情况
+			if s.PageOneTextHash != "" {
+				if s.PageTwoTextHash != "" {
+					if s.PageOneTextHash != s.PageTwoTextHash {
+						inc["page_success"] = 1
+					} else {
+						inc["page_fail"] = 1
+					}
+				} else {
+					inc["page_fail"] = 1
+				}
+			} else if s.PageTwoTextHash != "" {
+				inc["page_onefail"] = 1
+			}
+		}
+		if downloadAllNum > 0 {
+			rate := float64(downloadAllNum-repeatAllNum) / float64(downloadAllNum)
+			rate, _ = strconv.ParseFloat(fmt.Sprintf("%.2f", rate), 64)
+			if rate == 1.0 {
+				if downloadAllNum == 1 { //列表页数据过滤的只剩一条新数据
+					inc["oh_percent_onenum"] = 1
+				} else {
+					inc["oh_percent"] = 1
+				}
+				//} else if rate >= 0.9 {
+				//	inc["nt_percent"] = 1
+				//} else if rate >= 0.8 {
+				//	inc["et_percent"] = 1
+				//} else {
+				//	inc["other_percent"] = 1
+			}
+			if isRunRepeatList && endPage > max { //连续翻页超过了上限
+				if !s.ContinueDownListChildTask {
+					go ContinueDownListPageItem(s) //开启子任务继续采集
+				} else {
+					inc["uplimit"] = 1
+				}
+			}
+		} else {
+			inc["zero"] = 1
 		}
 		query := map[string]interface{}{
 			"date":       sDate,
@@ -459,22 +616,73 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 		}, true, false)
 	}
 	//信息重置
-	s.RecordedHeartInfo = false
 	s.PageOneTextHash = ""
 	s.PageTwoTextHash = ""
 	return errs
 }
 
+//下载某一页数据
+func (s *Spider) DownListOnePage(pagenum int) (downnum, repeatnum int) {
+	defer qu.Catch()
+	downtimes := 0
+	for downtimes < 3 { //错误重试3次
+		if err := s.L.CallByParam(lua.P{
+			Fn:      s.L.GetGlobal("downloadAndParseListPage"),
+			NRet:    1,
+			Protect: true,
+		}, lua.LNumber(pagenum)); err != nil {
+			//panic(s.Code + "," + err.Error())
+			logger.Error("列表页采集报错", pagenum, s.Code+","+err.Error())
+			atomic.AddInt32(&s.Script.ErrorNum, 1)
+			//列表页采集报错进行重试
+			downtimes++
+			continue
+		}
+		lv := s.L.Get(-1)
+		s.L.Pop(1)
+		if tbl, ok := lv.(*lua.LTable); ok {
+			//list := []map[string]interface{}{}
+			//qu.Debug("当前页数据量:", tbl.Len())
+			if tabLen := tbl.Len(); tabLen > 0 { //列表页有数据,根据列表页信息下载三级页
+				repeatListNum := 0 // 当前列表页连接重复个数
+				for i := 1; i <= tabLen; i++ {
+					v := tbl.RawGetInt(i).(*lua.LTable)
+					tmp := util.TableToMap(v)
+					if !s.IsHistoricalMend { //不是历史补漏
+						tmp["dataging"] = 0 //数据中打标记dataging=0
+						if s.DownDetail {
+							s.DownloadDetailItem(tmp, &repeatListNum)
+						}
+					} else { //历史补漏
+						s.HistoricalMendDownloadDetailItem(tmp) //历史补漏下载三级页
+					}
+				}
+				repeatnum = repeatListNum
+				downnum = tabLen
+				return
+				//if !s.IsHistoricalMend && !s.DownDetail {
+				//	if len(list) > 0 { //保存信息入库
+				//		StoreBlak(s.StoreMode, s.StoreToMsgEvent, s.Collection, s.CoverAttr, list)
+				//	}
+				//}
+			} else { //避免因网络问题当前下载的列表页无数据,重新请求下载列表页
+				downtimes++
+				continue
+			}
+		} else { //请求当前列表页失败
+			downtimes++
+			continue
+		}
+	}
+	return
+}
+
 //开启单独线程继续采集列表页
 func ContinueDownListPageItem(s *Spider) {
 	defer qu.Catch()
 	spTmp, errstr := CreateSpider(s.SCode, s.ScriptFile, true, true) //生成新爬虫
 	logger.Info(s.SCode, "补充连续翻页开始...")
 	if errstr == "" && spTmp != nil && spTmp.Code != "nil" { //脚本加载成功
-		spTmp.UserName = s.UserName
-		spTmp.UserEmail = s.UserEmail
-		spTmp.MUserName = s.MUserName
-		spTmp.MUserEmail = s.MUserEmail
 		spTmp.ContinueDownListChildTask = true
 		defer spTmp.L.Close()
 		err := spTmp.DownListPageItem() //下载列表
@@ -485,25 +693,6 @@ func ContinueDownListPageItem(s *Spider) {
 	}
 }
 
-//站点信息统计
-//func (s *Spider) ThisSiteData(tmp map[string]interface{}) {
-//	defer qu.Catch()
-//	href := qu.ObjToString(tmp["href"])
-//	url_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(s.TargetChannelUrl), "")
-//	href_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(href), "")
-//	if url_dn != href_dn {
-//		SaveMgoCache <- map[string]interface{}{
-//			"site":       s.Name,
-//			"channel":    s.Channel,
-//			"spidercode": s.Code,
-//			"url":        s.TargetChannelUrl,
-//			"href":       href,
-//			"modifyuser": s.MUserName,
-//			"comeintime": time.Now().Unix(),
-//		}
-//	}
-//}
-
 //遍历,开启三级页下载(历史补漏)
 func (s *Spider) HistoricalMendDownloadDetailItem(p interface{}) {
 	//qu.Debug("--------------历史下载-----------------")
@@ -916,9 +1105,10 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 			spChan := make(chan *Spider, threadNum+1) //初始化线程通道(+1表示基本的线程数)
 			if threadNum > 1 {                        //初始化多个sp
 				if !isHistory {
+					//从LoopListPath取爬虫信息,是为了保证创建的spider对象始终使用的是最新的爬虫信息(爬虫上架后会更新LoopListPath中的爬虫信息)
 					if v, ok := LoopListPath.Load(s.Code); ok && v != nil {
 						if info, ok := v.(map[string]string); ok {
-							NewSpiderByScript(threadNum, s.Code, info, spChan)
+							NewSpiderByScript(threadNum, s.Code, info["script"], spChan)
 						} else {
 							logger.Debug("LoopListPath Not Has Code:", s.Code)
 							spChan = make(chan *Spider, 1) //不能创建其它sp只能用主线程的sp
@@ -928,14 +1118,7 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 						spChan = make(chan *Spider, 1) //不能创建其它sp只能用主线程的sp
 					}
 				} else {
-					info := map[string]string{
-						"script":          s.ScriptFile,
-						"createuser":      s.UserName,
-						"createuseremail": s.UserEmail,
-						"modifyuser":      s.MUserName,
-						"modifyemail":     s.MUserEmail,
-					}
-					NewSpiderByScript(threadNum, s.Code, info, spChan)
+					NewSpiderByScript(threadNum, s.Code, s.ScriptFile, spChan)
 				}
 			}
 			spChan <- s //主线程sp放入通道
@@ -1108,14 +1291,10 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 }
 
 //初始化sp对象
-func NewSpiderByScript(num int, code string, info map[string]string, spChan chan *Spider) {
+func NewSpiderByScript(num int, code, script string, spChan chan *Spider) {
 	for i := 1; i <= num; i++ {
-		spTmp, errstr := CreateSpider(code, info["script"], true, true)
+		spTmp, errstr := CreateSpider(code, script, true, true)
 		if errstr == "" && spTmp != nil { //脚本加载成功
-			spTmp.UserName = info["createuser"]
-			spTmp.UserEmail = info["createuseremail"]
-			spTmp.MUserName = info["modifyuser"]
-			spTmp.MUserEmail = info["modifyemail"]
 			spChan <- spTmp
 		} else {
 			spChan <- nil
@@ -1133,7 +1312,6 @@ func AnalysisProjectInfo(data map[string]interface{}) string {
 	if detail == "详情请访问原网页!" || detail == "<br/>详情请访问原网页!" { //不判断包含关系因为有些数据为json拼接,字段不全,会加“详情请访问原网页”
 		if projectinfo, ok := data["projectinfo"].(map[string]interface{}); ok && len(projectinfo) > 0 {
 			if attachments, ok := projectinfo["attachments"].(map[string]interface{}); ok && len(attachments) > 0 {
-				fileOk := false
 				for _, data := range attachments {
 					if d, ok := data.(map[string]interface{}); ok {
 						fid := qu.ObjToString(d["fid"])
@@ -1142,9 +1320,7 @@ func AnalysisProjectInfo(data map[string]interface{}) string {
 						}
 					}
 				}
-				if !fileOk {
-					return "detail_file"
-				}
+				return "detail_file"
 			} else {
 				return "detail_file"
 			}
@@ -1157,6 +1333,7 @@ func AnalysisProjectInfo(data map[string]interface{}) string {
 
 //打印线程数
 func AllThreadLog() {
+	logger.Info("List Download All Thread:", ListAllThreadNum)
 	logger.Info("Detail Download All Thread:", AllThreadNum)
 	time.AfterFunc(1*time.Minute, AllThreadLog)
 }