Browse Source

历史节点模式调整;部分无效代码删除

maxiaoshan 1 year ago
parent
commit
ae43397493
4 changed files with 4 additions and 147 deletions
  1. 1 5
      src/main.go
  2. 3 21
      src/spider/history.go
  3. 0 42
      src/spider/script.go
  4. 0 79
      src/spider/store.go

+ 1 - 5
src/main.go

@@ -113,11 +113,9 @@ func InitMgo() {
 func main() {
 	//临时统计总的线程数
 	go spider.AllThreadLog()
-	//定时上传流量信息
-	//go spider.TimeTask()
 	//定时清理日志
 	go clearLogs()
-	//初始化爬虫服务
+	//初始化爬虫服务,开始下载列表
 	go spider.InitSpider()
 	//清理计数
 	go spider.GcCount()
@@ -140,8 +138,6 @@ func main() {
 	//go spider.UpdateErrDataMgo()
 	//爬虫信息提交编辑器
 	//go spider.SpiderInfoSend()
-	//保存爬虫采集非本站点数据
-	//go spider.SaveOtherSiteData()
 	logger.Debug(Config.Webport)
 	xweb.Run(":" + Config.Webport)
 }

+ 3 - 21
src/spider/history.go

@@ -1,7 +1,6 @@
 package spider
 
 import (
-	"fmt"
 	"github.com/donnie4w/go-logger/logger"
 	qu "qfw/util"
 	sputil "spiderutil"
@@ -26,9 +25,8 @@ func (s *Spider) StartSpider() {
 	for {
 		logger.Info("Running Code:", s.Code, "Stop:", s.Stop)
 		if !s.Stop { //爬虫是运行状态
-			//s.DownloadHistoryDetail()
 			s.DownloadDetail(true, true)
-		} else { //爬虫停止运行,删除
+		} else { //爬虫停止运行,删除(爬虫的下架依赖是否还有需要下载的数据,无则下架)
 			s.L.Close()
 			HistoryAllSpiders.Delete(s.Code)
 			logger.Info("Delete Code:", s.Code, "Stop:", s.Stop)
@@ -82,29 +80,13 @@ func GetHistoryDownloadSpider() {
 						false, true,
 					)
 				} else {
-					old := qu.IntAll((*lua)["old_lua"])
-					script := ""
-					if old == 1 {
-						script = fmt.Sprint((*lua)["luacontent"])
-					} else {
-						if (*lua)["oldlua"] != nil {
-							if (*lua)["luacontent"] != nil {
-								script = (*lua)["luacontent"].(string)
-							}
-						} else {
-							script = GetScriptByTmp((*lua))
-						}
-					}
+					script := GetScriptByTmp((*lua))
 					spTmp, b := HistoryAllSpiders.Load(code)
 					isNew := true
 					if b { //更新正在运行爬虫信息
 						sp, ok := spTmp.(*Spider)
 						if ok {
 							sp.ScriptFile = script
-							sp.UserName = qu.ObjToString((*lua)["createuser"])
-							sp.UserEmail = qu.ObjToString((*lua)["createuseremail"])
-							sp.MUserName = qu.ObjToString((*lua)["modifyuser"])
-							sp.MUserEmail = qu.ObjToString((*lua)["next"])
 							isNew = false
 						}
 					}
@@ -150,5 +132,5 @@ func GetHistoryDownloadSpider() {
 	}
 	wg.Wait()
 	//time.AfterFunc(time.Second*30, GetHistoryDownloadSpider)
-	time.AfterFunc(time.Minute*10, GetHistoryDownloadSpider)
+	time.AfterFunc(time.Minute*2, GetHistoryDownloadSpider)
 }

+ 0 - 42
src/spider/script.go

@@ -140,27 +140,6 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		}
 		var retLen int64
 		ret := Download(&retLen, s.Downloader, url, "get", util.GetTable(head), charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
-		//流量统计
-		//if retLen > 0 {
-		//	key := Today + "+" + code
-		//	if sf, ok := SpiderFlowMap.Load(key); ok && sf != nil {
-		//		if sfMap, ok := sf.(*SpiderFlow); ok {
-		//			sfMap.Flow += retLen
-		//			//sfMap.Site = *site
-		//			//sfMap.Channel = *channel
-		//			//sfMap.ModifyUser = *user
-		//			SpiderFlowMap.Store(key, sfMap)
-		//		}
-		//	} else {
-		//		SpiderFlowMap.Store(key, &SpiderFlow{
-		//			//Code:       code,
-		//			Site:       *site,
-		//			Channel:    *channel,
-		//			Flow:       retLen,
-		//			ModifyUser: *user,
-		//		})
-		//	}
-		//}
 		S.Push(lua.LString(ret))
 		atomic.AddInt32(&s.ToDayRequestNum, 1)
 		atomic.AddInt32(&s.TotalRequestNum, 1)
@@ -202,27 +181,6 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		} else {
 			ret, retcookie, headers = DownloadAdv(&retLen, s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
 		}
-		//流量统计
-		//if retLen > 0 {
-		//	key := Today + "+" + code
-		//	if sf, ok := SpiderFlowMap.Load(key); ok && sf != nil {
-		//		if sfMap, ok := sf.(*SpiderFlow); ok {
-		//			sfMap.Flow += retLen
-		//			//sfMap.Site = *site
-		//			//sfMap.Channel = *channel
-		//			//sfMap.ModifyUser = *user
-		//			SpiderFlowMap.Store(key, sfMap)
-		//		}
-		//	} else {
-		//		SpiderFlowMap.Store(key, &SpiderFlow{
-		//			//Code:       code,
-		//			Site:       *site,
-		//			Channel:    *channel,
-		//			Flow:       retLen,
-		//			ModifyUser: *user,
-		//		})
-		//	}
-		//}
 		S.Push(lua.LString(ret))
 		scookie, _ := json.Marshal(retcookie)
 		S.Push(lua.LString(scookie))

+ 0 - 79
src/spider/store.go

@@ -438,82 +438,3 @@ func SaveDataBak() {
 //		//Mgo.Update("regatherdata", "spider", "spider", query, set, true, false)
 //	}
 //}
-
-//保存爬虫采集非本站点数据
-//func SaveOtherSiteData() {
-//	fmt.Println("Save Other Site Data...")
-//	savearr := make([]map[string]interface{}, 200)
-//	indexh := 0
-//	for {
-//		select {
-//		case v := <-SaveMgoCache:
-//			savearr[indexh] = v
-//			indexh++
-//			if indexh == 200 {
-//				SPS <- true
-//				go func(savearr []map[string]interface{}) {
-//					defer func() {
-//						<-SPS
-//					}()
-//					MgoS.SaveBulk("spider_othersite", savearr...)
-//				}(savearr)
-//				savearr = make([]map[string]interface{}, 200)
-//				indexh = 0
-//			}
-//		case <-time.After(1 * time.Minute):
-//			if indexh > 0 {
-//				SPS <- true
-//				go func(savearr []map[string]interface{}) {
-//					defer func() {
-//						<-SPS
-//					}()
-//					MgoS.SaveBulk("spider_othersite", savearr...)
-//				}(savearr[:indexh])
-//				savearr = make([]map[string]interface{}, 200)
-//				indexh = 0
-//			}
-//		}
-//	}
-//}
-
-//定时任务
-//func TimeTask() {
-//	now := time.Now()
-//	Today = util.FormatDate(&now, util.Date_Short_Layout) //初始化日期
-//	cr := cron.New()
-//	cr.Start()
-//	cr.AddFunc("0 30 0 * * ?", UpdateSpiderFlow) //每天零时提交统计
-//}
-
-//更新流量信息
-//func UpdateSpiderFlow() {
-//	defer util.Catch()
-//	logger.Info("统计流量信息开始...", Today)
-//	arr := []map[string]interface{}{}
-//	SpiderFlowMap.Range(func(key, temp interface{}) bool {
-//		date := strings.Split(key.(string), "+")
-//		if len(date) == 2 && date[0] == Today { //统计非当天的
-//			if sfMap, ok := temp.(*SpiderFlow); ok {
-//				arr = append(arr, map[string]interface{}{
-//					"spidercode": date[1],
-//					"date":       date[0],
-//					"flow":       sfMap.Flow,
-//					"site":       sfMap.Site,
-//					"channel":    sfMap.Channel,
-//					"modifyuser": sfMap.ModifyUser,
-//					"comeintime": time.Now().Unix(),
-//					"event":      lu.Config.Uploadevent,
-//				})
-//				SpiderFlowMap.Delete(key) //统计完成后删除非当天数据
-//			}
-//		}
-//		return true
-//	})
-//	if len(arr) > 0 {
-//		MgoS.SaveBulk("spider_flow", arr...)
-//		arr = []map[string]interface{}{}
-//	}
-//	now := time.Now()
-//	Today = util.FormatDate(&now, util.Date_Short_Layout)
-//	logger.Info("统计流量信息完成...", Today)
-//}