maxiaoshan пре 3 година
родитељ
комит
6ca698a193
5 измењених фајлова са 209 додато и 77 уклоњено
  1. 2 0
      src/main.go
  2. 14 7
      src/spider/download.go
  3. 117 51
      src/spider/script.go
  4. 30 16
      src/spider/spider.go
  5. 46 3
      src/spider/store.go

+ 2 - 0
src/main.go

@@ -103,6 +103,8 @@ func initConfig(addr, alias, db string, dbsize int) {
 
 //
 func main() {
+	//定时上传流量信息
+	go spider.TimeTask()
 	//定时清理日志
 	go clearLogs()
 	//初始化爬虫服务

+ 14 - 7
src/spider/download.go

@@ -27,7 +27,7 @@ func init() {
 }
 
 //下载页面,发送消息,等待下载
-func Download(downloaderid, url, method string, head map[string]interface{}, encoding string, useproxy, ishttps bool, code string, timeout int64) string {
+func Download(retLen *int64, downloaderid, url, method string, head map[string]interface{}, encoding string, useproxy, ishttps bool, code string, timeout int64) string {
 	defer mu.Catch()
 	msgid := mu.UUID(8)
 	if len(head) < 1 {
@@ -61,6 +61,8 @@ func Download(downloaderid, url, method string, head map[string]interface{}, enc
 			return ""
 		}
 	}
+	retLenTmp := int64(len(ret))
+	*retLen = retLenTmp
 	if err != nil {
 		str := code + "方法DownloadAdv,url:" + url + ",err:" + err.Error()
 		logger.Error(str, timeout)
@@ -82,7 +84,7 @@ func Download(downloaderid, url, method string, head map[string]interface{}, enc
 }
 
 //下载页面,发送消息,等待下载
-func DownloadAdv(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie) {
+func DownloadAdv(retLen *int64, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie, map[string]interface{}) {
 	defer mu.Catch()
 	msgid := mu.UUID(8)
 	if len(head) < 1 {
@@ -117,9 +119,11 @@ func DownloadAdv(downloaderid, url, method string, reqparam, head map[string]int
 				"ishttps":  ishttps,
 			}, timeout)
 		} else {
-			return "", nil
+			return "", nil, nil
 		}
 	}
+	retLenTmp := int64(len(ret))
+	retLen = &retLenTmp
 	if err != nil {
 		str := code + "方法DownloadAdv,url:" + url + ",err:" + err.Error()
 		logger.Error(str, timeout)
@@ -127,16 +131,17 @@ func DownloadAdv(downloaderid, url, method string, reqparam, head map[string]int
 	tmp := map[string]interface{}{}
 	json.Unmarshal(ret, &tmp)
 	cooks := lu.ParseHttpCookie(tmp["cookie"])
+	headers, _ := tmp["header"].(map[string]interface{})
 	if v, ok := tmp["code"].(string); ok && v == "200" {
 		if isImg {
 			bs, _ := tmp["content"].(string)
-			return string(bs), cooks
+			return string(bs), cooks, headers
 		} else {
 			bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
-			return string(bs), cooks
+			return string(bs), cooks, headers
 		}
 	} else {
-		return "", nil
+		return "", nil, nil
 	}
 }
 
@@ -193,7 +198,7 @@ func DownloadFile_bak(downloaderid, url, method string, reqparam, head map[strin
 	}
 }
 
-func DownloadFile(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
+func DownloadFile(retLen *int64, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
 	defer mu.Catch()
 	timeout = timeout * 2
 	msgid := mu.UUID(8)
@@ -231,6 +236,8 @@ func DownloadFile(downloaderid, url, method string, reqparam, head map[string]in
 			return nil
 		}
 	}
+	retLenTmp := int64(len(ret))
+	retLen = &retLenTmp
 	if err != nil {
 		str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error()
 		logger.Error(str, timeout)

+ 117 - 51
src/spider/script.go

@@ -119,8 +119,28 @@ func (s *Script) LoadScript(site, channel, user, code, script_file string, newst
 		if charset == "" {
 			charset = s.Encoding
 		}
-		ret := Download(s.Downloader, url, "get", util.GetTable(head), charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
-
+		var retLen int64
+		ret := Download(&retLen, s.Downloader, url, "get", util.GetTable(head), charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
+		if retLen > 0 {
+			key := Today + "+" + code
+			if sf, ok := SpiderFlowMap.Load(key); ok && sf != nil {
+				if sfMap, ok := sf.(*SpiderFlow); ok {
+					sfMap.Flow += retLen
+					sfMap.Site = site
+					sfMap.Channel = channel
+					sfMap.ModifyUser = user
+					SpiderFlowMap.Store(key, sfMap)
+				}
+			} else {
+				SpiderFlowMap.Store(key, &SpiderFlow{
+					//Code:       code,
+					Site:       site,
+					Channel:    channel,
+					Flow:       retLen,
+					ModifyUser: user,
+				})
+			}
+		}
 		S.Push(lua.LString(ret))
 		atomic.AddInt32(&s.ToDayRequestNum, 1)
 		atomic.AddInt32(&s.TotalRequestNum, 1)
@@ -154,15 +174,39 @@ func (s *Script) LoadScript(site, channel, user, code, script_file string, newst
 		json.Unmarshal([]byte(cookie), &mycookie)
 		var ret string
 		var retcookie []*http.Cookie
+		var headers = map[string]interface{}{}
+		var retLen int64
 		if param == nil {
 			ptext := map[string]interface{}{"text": S.ToString(-3)}
-			ret, retcookie = DownloadAdv(s.Downloader, url, method, ptext, util.GetTable(head), mycookie, charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
+			ret, retcookie, headers = DownloadAdv(&retLen, s.Downloader, url, method, ptext, util.GetTable(head), mycookie, charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
 		} else {
-			ret, retcookie = DownloadAdv(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
+			ret, retcookie, headers = DownloadAdv(&retLen, s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
+		}
+		if retLen > 0 {
+			key := Today + "+" + code
+			if sf, ok := SpiderFlowMap.Load(key); ok && sf != nil {
+				if sfMap, ok := sf.(*SpiderFlow); ok {
+					sfMap.Flow += retLen
+					sfMap.Site = site
+					sfMap.Channel = channel
+					sfMap.ModifyUser = user
+					SpiderFlowMap.Store(key, sfMap)
+				}
+			} else {
+				SpiderFlowMap.Store(key, &SpiderFlow{
+					//Code:       code,
+					Site:       site,
+					Channel:    channel,
+					Flow:       retLen,
+					ModifyUser: user,
+				})
+			}
 		}
 		S.Push(lua.LString(ret))
 		scookie, _ := json.Marshal(retcookie)
 		S.Push(lua.LString(scookie))
+		hTable := util.MapToLuaTable(S, headers)
+		S.Push(hTable)
 		atomic.AddInt32(&s.ToDayRequestNum, 1)
 		atomic.AddInt32(&s.TotalRequestNum, 1)
 		end := time.Since(start)
@@ -170,7 +214,75 @@ func (s *Script) LoadScript(site, channel, user, code, script_file string, newst
 			s.LastThreeTimes = s.LastThreeTimes[1:]
 		}
 		s.LastThreeTimes = append(s.LastThreeTimes, end)
-		return 2
+		return 3
+	}))
+	//下载附件downloadFile(url,method,param,head,cookie,fileName)
+	s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
+		if s.FileLastThreeTimes == nil {
+			s.FileLastThreeTimes = make([]time.Duration, 4)
+		}
+		if util.Config.IsDelay {
+			SleepTime(3, s.FileLastThreeTimes) //睡眠时间
+		}
+		start := time.Now() //起始时间
+		cookie := S.ToString(-1)
+		head := S.ToTable(-2)
+		param := S.ToTable(-3)
+		method := S.ToString(-4)
+		url := S.ToString(-5)
+		fileName := S.ToString(-6)
+		ishttps := strings.Contains(url, "https")
+		var mycookie []*http.Cookie
+		if cookie != "{}" {
+			json.Unmarshal([]byte(cookie), &mycookie)
+		} else {
+			mycookie = make([]*http.Cookie, 0)
+		}
+		fileName = strings.TrimSpace(fileName)
+		url = strings.TrimSpace(url)
+		var retLen int64
+		ret := DownloadFile(&retLen, s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
+		if retLen > 0 {
+			key := Today + "+" + code
+			if sf, ok := SpiderFlowMap.Load(key); ok && sf != nil {
+				if sfMap, ok := sf.(*SpiderFlow); ok {
+					sfMap.Flow += retLen
+					sfMap.Site = site
+					sfMap.Channel = channel
+					sfMap.ModifyUser = user
+					SpiderFlowMap.Store(key, sfMap)
+				}
+			} else {
+				SpiderFlowMap.Store(key, &SpiderFlow{
+					//Code:       code,
+					Site:       site,
+					Channel:    channel,
+					Flow:       retLen,
+					ModifyUser: user,
+				})
+			}
+		}
+
+		url, name, size, ftype, fid := util.UploadFile(s.SCode, fileName, url, ret)
+		if strings.TrimSpace(ftype) == "" {
+			if len(path.Ext(name)) > 0 {
+				ftype = path.Ext(name)[1:]
+			}
+		}
+		S.Push(lua.LString(url))
+		S.Push(lua.LString(name))
+		S.Push(lua.LString(size))
+		S.Push(lua.LString(ftype))
+		S.Push(lua.LString(fid))
+		atomic.AddInt32(&s.ToDayRequestNum, 1)
+		atomic.AddInt32(&s.TotalRequestNum, 1)
+
+		end := time.Since(start)
+		if len(s.FileLastThreeTimes) >= 4 {
+			s.FileLastThreeTimes = s.FileLastThreeTimes[1:]
+		}
+		s.FileLastThreeTimes = append(s.FileLastThreeTimes, end)
+		return 5
 	}))
 	//保存验证错误日志
 	s.L.SetGlobal("saveErrLog", s.L.NewFunction(func(S *lua.LState) int {
@@ -521,52 +633,6 @@ func (s *Script) LoadScript(site, channel, user, code, script_file string, newst
 		return 1
 	}))
 
-	//下载附件download(url,method,param,head,cookie,fileName)
-	s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
-		if s.FileLastThreeTimes == nil {
-			s.FileLastThreeTimes = make([]time.Duration, 4)
-		}
-		if util.Config.IsDelay {
-			SleepTime(3, s.FileLastThreeTimes) //睡眠时间
-		}
-		start := time.Now() //起始时间
-		cookie := S.ToString(-1)
-		head := S.ToTable(-2)
-		param := S.ToTable(-3)
-		method := S.ToString(-4)
-		url := S.ToString(-5)
-		fileName := S.ToString(-6)
-		ishttps := strings.Contains(url, "https")
-		var mycookie []*http.Cookie
-		if cookie != "{}" {
-			json.Unmarshal([]byte(cookie), &mycookie)
-		} else {
-			mycookie = make([]*http.Cookie, 0)
-		}
-		fileName = strings.TrimSpace(fileName)
-		url = strings.TrimSpace(url)
-		ret := DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
-		url, name, size, ftype, fid := util.UploadFile(s.SCode, fileName, url, ret)
-		if strings.TrimSpace(ftype) == "" {
-			if len(path.Ext(name)) > 0 {
-				ftype = path.Ext(name)[1:]
-			}
-		}
-		S.Push(lua.LString(url))
-		S.Push(lua.LString(name))
-		S.Push(lua.LString(size))
-		S.Push(lua.LString(ftype))
-		S.Push(lua.LString(fid))
-		atomic.AddInt32(&s.ToDayRequestNum, 1)
-		atomic.AddInt32(&s.TotalRequestNum, 1)
-
-		end := time.Since(start)
-		if len(s.FileLastThreeTimes) >= 4 {
-			s.FileLastThreeTimes = s.FileLastThreeTimes[1:]
-		}
-		s.FileLastThreeTimes = append(s.FileLastThreeTimes, end)
-		return 5
-	}))
 	s.L.SetGlobal("clearMemoeryCache", s.L.NewFunction(func(S *lua.LState) int {
 		/*title := S.ToString(-1)
 		isExist, _ := redis.Exists("title_repeat_judgement", "title_repeat_"+title)

+ 30 - 16
src/spider/spider.go

@@ -16,6 +16,7 @@ import (
 	qu "qfw/util"
 	mgu "qfw/util/mongodbutil"
 	"strconv"
+	"sync"
 
 	//mgu "qfw/util/mongodbutil"
 	//"qfw/util/redis"
@@ -31,6 +32,7 @@ import (
 	"github.com/yuin/gopher-lua"
 )
 
+//心跳
 type Heart struct {
 	DetailHeart        int64  //爬虫三级页执行心跳
 	DetailExecuteHeart int64  //三级页采集到数据心跳
@@ -41,6 +43,15 @@ type Heart struct {
 	Channel            string //栏目
 }
 
+//流量
+type SpiderFlow struct {
+	Flow       int64  //流量
+	ModifyUser string //爬虫维护人
+	Site       string //站点
+	Channel    string //栏目
+	//Code       string
+}
+
 //爬虫()
 type Spider struct {
 	Script
@@ -93,6 +104,8 @@ var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
 var DomainNameReg = regexp.MustCompile(`(?://).+?(?:)[::/]`)
 var RepDomainNameReg = regexp.MustCompile(`[::/]+`)
 var DelaySites map[string]int //延迟采集站点集合
+var Today string
+var SpiderFlowMap = sync.Map{} //code:{"2022-05-16":SpiderFlow}
 
 //心跳
 func UpdateHeart(site, channel, code, user, t string) {
@@ -483,7 +496,7 @@ func (s *Spider) HistoricalMendDownloadDetailItem(p interface{}) {
 		//更新spider_listdata中数据下载成功标记
 		if id != "" {
 			//Mgo.Update("spider_listdata", map[string]interface{}{"href": href}, map[string]interface{}{"$set": map[string]interface{}{"state": 1, "byid": id}}, false, true)
-			Mgo.UpdateById("spider_listdata", id, map[string]interface{}{"$set": map[string]interface{}{"state": 1}})
+			Mgo.UpdateById("spider_listdata", id, map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true, "updatetime": time.Now().Unix()}})
 		}
 		return
 	}
@@ -504,7 +517,7 @@ func (s *Spider) HistoricalMendDownloadDetailItem(p interface{}) {
 		return
 	}
 	//详情页过滤数据
-	set := map[string]interface{}{"state": 1}
+	set := map[string]interface{}{"state": 1, "updatetime": time.Now().Unix()}
 	if data["delete"] != nil {
 		//增量
 		util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*365)
@@ -629,7 +642,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 		}
 		//更新spider_listdata中数据下载失败标记
 		if id != "" {
-			Mgo.UpdateById("spider_listdata", id, map[string]interface{}{"$set": map[string]interface{}{"state": -1}})
+			Mgo.UpdateById("spider_listdata", id, map[string]interface{}{"$set": map[string]interface{}{"state": -1, "updatetime": time.Now().Unix()}})
 		}
 		return
 	} else if tmphref := qu.ObjToString(data["href"]); tmphref != href { //三级页href替换导致前后href不同
@@ -648,7 +661,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 	if !s.Stop {
 		UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=0老模式采集到数据心跳
 	}
-	set := map[string]interface{}{"state": 1, "byid": id}
+	set := map[string]interface{}{"state": 1, "updatetime": time.Now().Unix(), "byid": id}
 	//详情页过滤数据
 	if data["delete"] != nil {
 		//增量
@@ -837,7 +850,8 @@ func (s *Spider) DownloadHighDetail() {
 			if !s.Stop { //在下载详情页时爬虫下架,此时不再存心跳信息
 				UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
 			}
-			list, _ := Mgo.Find("spider_highlistdata", q, o, f, false, 0, 100)
+			list, _ := Mgo.Find("spider_highlistdata_test", q, o, f, false, 0, 100)
+			qu.Debug("----", len(*list))
 			if list != nil && len(*list) > 0 {
 				for _, tmp := range *list {
 					_id := tmp["_id"]
@@ -847,8 +861,8 @@ func (s *Spider) DownloadHighDetail() {
 					//为了避免重复下载,进行增量redis判重
 					isExist, _ := util.ExistRedis("title_repeat_judgement", 0, "url_repeat_"+href)
 					if isExist {
-						set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true}} //已存在state置为1
-						Mgo.Update("spider_highlistdata", query, set, false, false)
+						set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true, "updatetime": time.Now().Unix()}} //已存在state置为1
+						Mgo.Update("spider_highlistdata_test", query, set, false, false)
 						continue
 					}
 					if isEsRepeat { //es数据title判重
@@ -858,8 +872,8 @@ func (s *Spider) DownloadHighDetail() {
 						esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
 						count := Es.Count(EsIndex, EsType, esQuery)
 						if count > 0 { //es中含本title数据,不再采集,更新list表数据状态
-							set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true}} //已存在state置为1
-							Mgo.Update("spider_highlistdata", query, set, false, false)
+							set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true, "updatetime": time.Now().Unix()}} //已存在state置为1
+							Mgo.Update("spider_highlistdata_test", query, set, false, false)
 							util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*365)
 							continue
 						}
@@ -908,7 +922,7 @@ func (s *Spider) DownloadHighDetail() {
 							ss["state"] = -1
 						}
 						set := map[string]interface{}{"$set": ss}
-						Mgo.Update("spider_highlistdata", query, set, false, false)
+						Mgo.Update("spider_highlistdata_test", query, set, false, false)
 						continue
 					} else {
 						deleteData := FilterByDetail(href, query, data) //针对列表页无法过滤需要在详情页过滤的数据,进行过滤处理
@@ -937,8 +951,8 @@ func (s *Spider) DownloadHighDetail() {
 					data["dataging"] = 0
 					data["iscompete"] = s.IsCompete //2021-11-01以后新增的爬虫不在展示原文链接(保存服务判断)
 					Store(s.StoreMode, s.StoreToMsgEvent, s.Collection, s.CoverAttr, data, true)
-					set := map[string]interface{}{"$set": map[string]interface{}{"state": 1}} //下载成功state置为1
-					Mgo.Update("spider_highlistdata", query, set, false, false)
+					set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "updatetime": time.Now().Unix()}} //下载成功state置为1
+					Mgo.Update("spider_highlistdata_test", query, set, false, false)
 				}
 				//重载spider
 				s.LoadScript(s.Name, s.Channel, s.MUserName, s.Code, s.ScriptFile, true)
@@ -997,7 +1011,7 @@ func (s *Spider) DownloadListDetail() {
 			//为了避免重复下载,进行增量redis判重
 			isExist, _ := util.ExistRedis("title_repeat_judgement", 0, "url_repeat_"+href)
 			if isExist {
-				set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true}} //已存在state置为1
+				set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true, "updatetime": time.Now().Unix()}} //已存在state置为1
 				Mgo.Update("spider_highlistdata", query, set, false, false)
 				continue
 			}
@@ -1007,7 +1021,7 @@ func (s *Spider) DownloadListDetail() {
 				sTime := eTime - int64(7*86400)
 				esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
 				if Es.Count(EsIndex, EsType, esQuery) > 0 { //es中含本title数据,不再采集,更新list表数据状态
-					set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true}} //已存在state置为1
+					set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true, "updatetime": time.Now().Unix()}} //已存在state置为1
 					Mgo.Update("spider_highlistdata", query, set, false, false)
 					util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*365)
 					continue
@@ -1085,7 +1099,7 @@ func (s *Spider) DownloadListDetail() {
 			data["dataging"] = 0
 			data["iscompete"] = s.IsCompete //2021-11-01以后新增的爬虫不在展示原文链接(保存服务判断)
 			Store(s.StoreMode, s.StoreToMsgEvent, s.Collection, s.CoverAttr, data, true)
-			set := map[string]interface{}{"$set": map[string]interface{}{"state": 1}} //下载成功state置为1
+			set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "updatetime": time.Now().Unix()}} //下载成功state置为1
 			Mgo.Update("spider_highlistdata", query, set, false, false)
 		}
 	}
@@ -1100,7 +1114,7 @@ func FilterByDetail(href string, query, data map[string]interface{}) bool {
 		hashHref := HexText(href)
 		util.PutRedis("title_repeat_fulljudgement", db, hashHref, "", -1)
 		//更新mgo 要删除的数据更新spider_highlistdata state=1不再下载,更新redis
-		set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "delete": true}}
+		set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "delete": true, "updatetime": time.Now().Unix()}}
 		Mgo.Update("spider_highlistdata", query, set, false, false)
 		return true
 	}

+ 46 - 3
src/spider/store.go

@@ -7,14 +7,15 @@ import (
 	"qfw/util"
 	mgu "qfw/util/mongodbutil"
 
+	"github.com/cron"
+	"github.com/donnie4w/go-logger/logger"
+	"github.com/yuin/gopher-lua"
+
 	//"qfw/util/redis"
 	lu "spiderutil"
 	"strings"
 	"sync/atomic"
 	"time"
-
-	"github.com/donnie4w/go-logger/logger"
-	"github.com/yuin/gopher-lua"
 )
 
 type LogMap struct {
@@ -261,6 +262,7 @@ func SaveListPageData(tmp map[string]interface{}, id *string, isEsRepeat bool) {
 	if isEsRepeat { //类竞品数据es判重掉后,更新状态
 		tmp["state"] = 1
 		tmp["exist"] = true
+		tmp["updatetime"] = time.Now().Unix()
 	}
 	*id = Mgo.Save("spider_listdata", tmp)
 }
@@ -402,3 +404,44 @@ func SaveOtherSiteData() {
 		}
 	}
 }
+
+//定时任务
+func TimeTask() {
+	now := time.Now()
+	Today = util.FormatDate(&now, util.Date_Short_Layout) //初始化日期
+	cr := cron.New()
+	cr.Start()
+	cr.AddFunc("0 30 0 * * ?", UpdateSpiderFlow) //每天零时提交统计
+}
+
+//更新流量信息
+func UpdateSpiderFlow() {
+	defer util.Catch()
+	logger.Info("统计流量信息开始...", Today)
+	arr := []map[string]interface{}{}
+	SpiderFlowMap.Range(func(key, temp interface{}) bool {
+		date := strings.Split(key.(string), "+")
+		if len(date) == 2 && date[0] != Today { //统计非当天的
+			if sfMap, ok := temp.(*SpiderFlow); ok {
+				arr = append(arr, map[string]interface{}{
+					"spidercode": date[1],
+					"date":       date[0],
+					"flow":       sfMap.Flow,
+					"site":       sfMap.Site,
+					"channel":    sfMap.Channel,
+					"modifyuser": sfMap.ModifyUser,
+					"comeintime": time.Now().Unix(),
+				})
+				SpiderFlowMap.Delete(key) //统计完成后删除非当天数据
+			}
+		}
+		return true
+	})
+	if len(arr) > 0 {
+		Mgo.SaveBulk("spider_flow", arr...)
+		arr = []map[string]interface{}{}
+	}
+	now := time.Now()
+	Today = util.FormatDate(&now, util.Date_Short_Layout)
+	logger.Info("统计流量信息完成...", Today)
+}