Parcourir la source

新增数据总量统计提醒

maxiaoshan il y a 3 ans
Parent
commit
f7651b9dfa
5 fichiers modifiés avec 235 ajouts et 3 suppressions
  1. 7 0
      src/config.json
  2. 97 0
      src/logs/task.log
  3. 44 1
      src/luatask/sitecount.go
  4. 77 2
      src/luatask/task.go
  5. 10 0
      src/util/config.go

+ 7 - 0
src/config.json

@@ -14,6 +14,13 @@
 		"db": "py_spider",
 		"size": 5
 	},
+	"bidding": {
+		"addr": "192.168.3.207:27092",
+		"db": "qfw",
+		"size": 2,
+		"username": "",
+		"password": ""
+	},
 	"msgservers": {
 		"comm": {
 			"addr": "spdata.jianyu360.com:801",

+ 97 - 0
src/logs/task.log

@@ -702757,3 +702757,100 @@
 2022/06/21 18:55:03 sitecount.go:136: debug  23600
 2022/06/21 18:55:03 sitecount.go:136: debug  23700
 2022/06/21 18:55:03 sitecount.go:141: debug  统计采集量luacodeinfo完成...
+2022/07/04 10:39:34 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:39:36 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:39:36 task.go:302: debug  0
+2022/07/04 10:39:36 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:39:39 task.go:574: debug  0
+2022/07/04 10:39:39 task.go:579: debug  统计采集量spider_highlistdata完成...
+2022/07/04 10:39:41 task.go:729: debug  0
+2022/07/04 10:39:41 task.go:736: debug  统计spider_listdata采集量完成...
+2022/07/04 10:40:28 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:40:38 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:40:38 task.go:302: debug  0
+2022/07/04 10:40:38 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:40:43 task.go:574: debug  0
+2022/07/04 10:40:43 task.go:579: debug  统计采集量spider_highlistdata完成...
+2022/07/04 10:41:08 task.go:729: debug  0
+2022/07/04 10:41:08 task.go:736: debug  统计spider_listdata采集量完成...
+2022/07/04 10:42:22 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:42:25 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:42:25 task.go:302: debug  0
+2022/07/04 10:42:25 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:42:36 task.go:574: debug  0
+2022/07/04 10:42:38 task.go:579: debug  统计采集量spider_highlistdata完成... map[a_hssrmyy_zbgg:0xc000047980 a_zggjzbw_pbjggs:0xc000304f00 a_zggjzbw_zbgg:0xc000047b00]
+2022/07/04 10:43:31 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:43:35 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:43:35 task.go:302: debug  0
+2022/07/04 10:43:35 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:43:40 task.go:574: debug  0
+2022/07/04 10:47:54 task.go:579: debug  统计采集量spider_highlistdata完成... map[a_hssrmyy_zbgg:0xc000404300 a_zggjzbw_pbjggs:0xc000109e00 a_zggjzbw_zbgg:0xc000314180]
+2022/07/04 10:49:01 task.go:729: debug  0
+2022/07/04 10:49:01 task.go:736: debug  统计spider_listdata采集量完成... map[a_hssrmyy_zbgg:0xc000404300 a_zggjzbw_pbjggs:0xc000109e00 a_zggjzbw_zbgg:0xc000314180]
+2022/07/04 10:49:23 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:49:26 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:49:26 task.go:302: debug  0
+2022/07/04 10:49:26 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:49:39 task.go:574: debug  0
+2022/07/04 10:50:00 task.go:579: debug  统计采集量spider_highlistdata完成... map[a_hssrmyy_zbgg:0xc000047200 a_zggjzbw_pbjggs:0xc000409500 a_zggjzbw_zbgg:0xc000047380]
+2022/07/04 10:50:00 task.go:729: debug  0
+2022/07/04 10:50:00 task.go:736: debug  统计spider_listdata采集量完成... map[a_hssrmyy_zbgg:0xc000047200 a_zggjzbw_pbjggs:0xc000409500 a_zggjzbw_zbgg:0xc000047380]
+2022/07/04 10:50:13 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:50:27 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:50:29 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:50:29 task.go:302: debug  0
+2022/07/04 10:50:29 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:51:00 task.go:574: debug  0
+2022/07/04 10:51:21 task.go:579: debug  统计采集量spider_highlistdata完成... map[a_hssrmyy_zbgg:0xc00050a480 a_zggjzbw_pbjggs:0xc000305080 a_zggjzbw_zbgg:0xc000305200]
+2022/07/04 10:52:05 task.go:729: debug  0
+2022/07/04 10:53:08 task.go:736: debug  统计spider_listdata采集量完成... map[a_hssrmyy_zbgg:0xc00050a480 a_zggjzbw_pbjggs:0xc000305080 a_zggjzbw_zbgg:0xc000305200]
+2022/07/04 10:53:59 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:54:03 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:54:04 task.go:302: debug  0
+2022/07/04 10:54:04 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:54:04 task.go:574: debug  0
+2022/07/04 10:54:08 task.go:579: debug  统计采集量spider_highlistdata完成...
+2022/07/04 10:54:08 task.go:729: debug  0
+2022/07/04 10:54:10 task.go:736: debug  统计spider_listdata采集量完成...
+2022/07/04 10:54:10 task.go:1984: debug  CodeInfoMap: 3
+2022/07/04 10:54:10 task.go:2031: debug  爬虫基本信息生成完成...
+2022/07/04 10:54:51 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:54:51 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:54:51 task.go:302: debug  0
+2022/07/04 10:54:51 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:54:51 task.go:574: debug  0
+2022/07/04 10:54:51 task.go:579: debug  统计采集量spider_highlistdata完成...
+2022/07/04 10:54:51 task.go:729: debug  0
+2022/07/04 10:54:51 task.go:736: debug  统计spider_listdata采集量完成...
+2022/07/04 10:54:51 task.go:1984: debug  CodeInfoMap: 3
+2022/07/04 10:54:51 task.go:2031: debug  爬虫基本信息生成完成...
+2022/07/04 10:55:25 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:55:25 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:55:25 task.go:302: debug  0
+2022/07/04 10:55:25 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:55:25 task.go:574: debug  0
+2022/07/04 10:55:25 task.go:579: debug  统计采集量spider_highlistdata完成...
+2022/07/04 10:55:25 task.go:729: debug  0
+2022/07/04 10:55:25 task.go:736: debug  统计spider_listdata采集量完成...
+2022/07/04 10:55:25 task.go:1984: debug  CodeInfoMap: 3
+2022/07/04 10:55:25 task.go:2032: debug  爬虫基本信息生成完成...
+2022/07/04 10:56:05 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:56:05 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:56:05 task.go:302: debug  0
+2022/07/04 10:56:05 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:56:05 task.go:574: debug  0
+2022/07/04 10:56:05 task.go:579: debug  统计采集量spider_highlistdata完成...
+2022/07/04 10:56:05 task.go:729: debug  0
+2022/07/04 10:56:05 task.go:736: debug  统计spider_listdata采集量完成...
+2022/07/04 10:56:05 task.go:1984: debug  CodeInfoMap: 3
+2022/07/04 10:56:05 task.go:2032: debug  爬虫基本信息生成完成...
+2022/07/04 10:57:02 task.go:175: debug  1656604800 1656691200 2022-07-01
+2022/07/04 10:57:02 task.go:257: debug  共加载线上爬虫个数: 3
+2022/07/04 10:57:02 task.go:302: debug  0
+2022/07/04 10:57:02 task.go:307: debug  爬虫基本信息准备完成... 3
+2022/07/04 10:57:02 task.go:574: debug  0
+2022/07/04 10:57:02 task.go:579: debug  统计采集量spider_highlistdata完成...
+2022/07/04 10:57:02 task.go:729: debug  0
+2022/07/04 10:57:02 task.go:736: debug  统计spider_listdata采集量完成...
+2022/07/04 10:57:02 task.go:1984: debug  CodeInfoMap: 3
+2022/07/04 10:57:02 task.go:2032: debug  爬虫基本信息生成完成...

+ 44 - 1
src/luatask/sitecount.go

@@ -38,6 +38,32 @@ var SiteInfoModel = `{
     }
 }`
 
+var LuaListDownloadAllNum int64
+var LuaListDownloadSuccessAllNum int64
+var LuaBiddingDownloadAllNum int64
+var PythonListDownloadAllNum int64
+var PythonListDownloadSuccessAllNum int64
+var PythonBiddingDownloadAllNum int64
+
+var LuaPythonNumModel = `{
+    "msgtype": "text",
+    "text": {
+		"content": "%s"
+	}
+}`
+var MarkdownModel = `{
+    "msgtype": "markdown",
+    "markdown": {
+        "content": "%s"
+    }
+}`
+var NumContentModel = `
+     >平台:<font color=\"warning\">%s</font>
+     >列表页采集量:<font color=\"warning\">%d个</font>
+     >列表页采集成功量:<font color=\"warning\">%d个</font>\n
+     >Bidding成功量:<font color=\"warning\">%d个</font>\n
+`
+
 //var AllHref map[string]string
 
 //重点网站每日采集量统计
@@ -81,7 +107,6 @@ func SendInfoToWxWork_SiteDataCount() {
 	//6、汇总excel
 	//GetSiteInfoExcel(siteInfoMap, siteInfoMap_Back, allSpiderMap)
 	GetSiteInfoExcel(allSpiderMap)
-	//二、企业微信提醒
 }
 func GetAllSpidercodeNum(siteInfoMap map[string]*SiteInfo) {
 	defer qu.Catch()
@@ -247,6 +272,24 @@ func SendSiteInfoToWxWork(file *xlsx.File) {
 	defer resp1.Body.Close()
 }
 
+func SendLuaPythonAllNum() {
+	defer qu.Catch()
+	luaContent := fmt.Sprintf(NumContentModel, "Lua", LuaListDownloadAllNum, LuaListDownloadSuccessAllNum, LuaBiddingDownloadAllNum)
+	pythonContent := fmt.Sprintf(NumContentModel, "python", PythonListDownloadAllNum, PythonListDownloadSuccessAllNum, PythonBiddingDownloadAllNum)
+	resultContent := fmt.Sprintf(MarkdownModel, luaContent+pythonContent)
+	qu.Debug(resultContent)
+	resp, err := http.Post(
+		"https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=97850772-88d0-4544-a2c3-6201aeddff9e",
+		"application/json",
+		bytes.NewBuffer([]byte(resultContent)),
+	)
+	if err != nil {
+		fmt.Println("request error:", err)
+		return
+	}
+	defer resp.Body.Close()
+}
+
 //func GetHighListDataNum(ctime, etime int64, ptime string, siteInfoMap map[string]*SiteInfo) {
 //	defer qu.Catch()
 //	sess := util.MgoS.GetMgoConn()

+ 77 - 2
src/luatask/task.go

@@ -6,6 +6,7 @@ import (
 	qu "qfw/util"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 	"util"
 
@@ -173,6 +174,7 @@ func StartTask() {
 	InitInfo() //初始化时间
 	logger.Debug(StartTime, EndTime, Publishtime)
 	GetCodeBaseInfo()              //初始化爬虫基本信息
+	GetBiddingCount()              //统计bidding表爬虫采集量
 	GetCodeHeart()                 //初始化爬虫心跳信息
 	GetSpiderHighListDownloadNum() //统计spider_highlistdata爬虫列表页下载量、下载失败量、未下载量
 	GetSpiderListDownloadNum()     //统计spider_listdata爬虫列表页下载量、下载失败量、未下载量
@@ -185,6 +187,7 @@ func StartTask() {
 	// GetDownloadNumber() //统计下载量
 	//CloseTask()      //关闭任务
 	SendInfoToWxWork_SiteDataCount()
+	SendLuaPythonAllNum()
 }
 
 //初始化
@@ -248,6 +251,7 @@ func GetCodeBaseInfo() {
 		"l_uploadtime":      1,
 		"listisfilter":      1,
 		"frequencyerrtimes": 1,
+		"code":              1,
 	}
 	count := util.MgoE.Count("luaconfig", query)
 	logger.Debug("共加载线上爬虫个数:", count)
@@ -265,13 +269,14 @@ func GetCodeBaseInfo() {
 				Error: map[string]*ErrorInfo{},
 			}
 			if param_common, ok := tmp["param_common"].([]interface{}); ok && len(param_common) >= 6 {
-				sp.Code = qu.ObjToString(param_common[0])
+				//sp.Code = qu.ObjToString(param_common[0])
 				sp.Site = qu.ObjToString(param_common[1])
 				sp.Channel = qu.ObjToString(param_common[2])
 				sp.MaxPage = qu.IntAll(param_common[5])
 			} else {
 				logger.Debug("加载爬虫出错:", tmp["_id"])
 			}
+			sp.Code = qu.ObjToString(tmp["code"])
 			sp.ModifyUser = qu.ObjToString(tmp["modifyuser"])
 			sp.ModifyId = qu.ObjToString(tmp["modifyuserid"])
 			sp.AuditTime = qu.Int64All(tmp["l_uploadtime"])
@@ -302,6 +307,54 @@ func GetCodeBaseInfo() {
 	logger.Debug("爬虫基本信息准备完成...", len(CodeInfoMap))
 }
 
+func GetBiddingCount() {
+	defer qu.Catch()
+	sess := util.MgoB.GetMgoConn()
+	defer util.MgoB.DestoryMongoConn(sess)
+	lock := &sync.Mutex{}
+	wg := &sync.WaitGroup{}
+	ch := make(chan bool, 5)
+	query := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": StartTime,
+			"$lt":  EndTime,
+		},
+	}
+	fieles := map[string]interface{}{
+		"spidercode": 1,
+	}
+	count := util.MgoB.Count("bidding", query)
+	logger.Debug("bidding采集数据量:", count)
+	it := sess.DB(util.MgoB.DbName).C("bidding").Find(&query).Select(&fieles).Iter()
+	n := 0
+	for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			code := qu.ObjToString(tmp["spidercode"])
+			lock.Lock()
+			if sp := CodeInfoMap[code]; sp != nil {
+				if sp.Platform == "golua平台" {
+					LuaBiddingDownloadAllNum++
+				} else if sp.Platform == "python" {
+					PythonBiddingDownloadAllNum++
+				}
+			}
+			lock.Unlock()
+		}(tmp)
+		if n%1000 == 0 {
+			logger.Debug(n)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("Bidding数据量统计完成...", LuaBiddingDownloadAllNum, PythonBiddingDownloadAllNum)
+}
+
 // GetCodeHeart 获取爬虫的心跳信息
 func GetCodeHeart() {
 	defer qu.Catch()
@@ -1632,7 +1685,18 @@ func CreateTaskProcess() {
 			}
 			//根据爬虫信息新建任务
 			CreateTask(task, spider, &upsertBulk, lock) //比对历史任务,新建任务
-			//
+			if spider.Platform == "golua平台" {
+				//列表页总下载量
+				atomic.AddInt64(&LuaListDownloadAllNum, int64(spider.RepeatDownloadAllNum))
+				//列表页总下载成功量
+				atomic.AddInt64(&LuaListDownloadSuccessAllNum, int64(spider.RepeatDownloadSuccessNum))
+			} else {
+				//列表页总下载量
+				atomic.AddInt64(&PythonListDownloadAllNum, int64(spider.RepeatDownloadAllNum))
+				//列表页总下载成功量
+				atomic.AddInt64(&PythonListDownloadSuccessAllNum, int64(spider.RepeatDownloadSuccessNum))
+			}
+
 			lock.Lock()
 			if len(arr) > 500 {
 				util.MgoE.SaveBulk("luacodeinfo", arr...)
@@ -1940,6 +2004,17 @@ func SaveCodeInfo() {
 				logger.Debug("Json UnMarshal Error", code)
 				return
 			}
+			if sp.Platform == "golua平台" {
+				//列表页总下载量
+				atomic.AddInt64(&LuaListDownloadAllNum, int64(sp.RepeatDownloadAllNum))
+				//列表页总下载成功量
+				atomic.AddInt64(&LuaListDownloadSuccessAllNum, int64(sp.RepeatDownloadSuccessNum))
+			} else {
+				//列表页总下载量
+				atomic.AddInt64(&PythonListDownloadAllNum, int64(sp.RepeatDownloadAllNum))
+				//列表页总下载成功量
+				atomic.AddInt64(&PythonListDownloadSuccessAllNum, int64(sp.RepeatDownloadSuccessNum))
+			}
 			lock.Lock()
 			if len(arr) > 500 {
 				util.MgoE.SaveBulk("luacodeinfo_back", arr...)

+ 10 - 0
src/util/config.go

@@ -12,6 +12,7 @@ var (
 	MgoE                       *mgo.MongodbSim //editor
 	MgoS                       *mgo.MongodbSim //spider
 	MgoPy                      *mgo.MongodbSim //py_spider
+	MgoB                       *mgo.MongodbSim //bidding
 	CommServers                map[string]interface{}
 	BidServers                 map[string]interface{}
 	UploadEvents               map[int]string
@@ -55,6 +56,15 @@ func InitMgo() {
 		Size:        qu.IntAll(editor["size"]),
 	}
 	MgoE.InitPool()
+	bidding := Config["bidding"].(map[string]interface{})
+	MgoB = &mgo.MongodbSim{
+		MongodbAddr: qu.ObjToString(bidding["addr"]),
+		DbName:      qu.ObjToString(bidding["db"]),
+		Size:        qu.IntAll(bidding["size"]),
+		UserName:    qu.ObjToString(bidding["username"]),
+		Password:    qu.ObjToString(bidding["password"]),
+	}
+	MgoB.InitPool()
 }
 
 func InitOther() {