Browse Source

修改统计

maxiaoshan 3 years ago
parent
commit
0b018da055
3 changed files with 336 additions and 30 deletions
  1. 240 0
      src/logs/task.log
  2. 17 12
      src/luatask/sitecount.go
  3. 79 18
      src/luatask/task.go

+ 240 - 0
src/logs/task.log

@@ -702517,3 +702517,243 @@
 2022/06/17 11:24:34 sitecount.go:132: debug  23500
 2022/06/17 11:24:34 sitecount.go:132: debug  23600
 2022/06/17 11:24:34 sitecount.go:137: debug  统计采集量luacodeinfo完成...
+2022/06/21 18:55:02 sitecount.go:88: info  统计采集量luacodeinfo开始...
+2022/06/21 18:55:02 sitecount.go:136: debug  0
+2022/06/21 18:55:02 sitecount.go:136: debug  100
+2022/06/21 18:55:02 sitecount.go:136: debug  200
+2022/06/21 18:55:02 sitecount.go:136: debug  300
+2022/06/21 18:55:02 sitecount.go:136: debug  400
+2022/06/21 18:55:02 sitecount.go:136: debug  500
+2022/06/21 18:55:02 sitecount.go:136: debug  600
+2022/06/21 18:55:02 sitecount.go:136: debug  700
+2022/06/21 18:55:02 sitecount.go:136: debug  800
+2022/06/21 18:55:02 sitecount.go:136: debug  900
+2022/06/21 18:55:02 sitecount.go:136: debug  1000
+2022/06/21 18:55:02 sitecount.go:136: debug  1100
+2022/06/21 18:55:02 sitecount.go:136: debug  1200
+2022/06/21 18:55:02 sitecount.go:136: debug  1300
+2022/06/21 18:55:02 sitecount.go:136: debug  1400
+2022/06/21 18:55:02 sitecount.go:136: debug  1500
+2022/06/21 18:55:02 sitecount.go:136: debug  1600
+2022/06/21 18:55:02 sitecount.go:136: debug  1700
+2022/06/21 18:55:02 sitecount.go:136: debug  1800
+2022/06/21 18:55:02 sitecount.go:136: debug  1900
+2022/06/21 18:55:02 sitecount.go:136: debug  2000
+2022/06/21 18:55:02 sitecount.go:136: debug  2100
+2022/06/21 18:55:02 sitecount.go:136: debug  2200
+2022/06/21 18:55:02 sitecount.go:136: debug  2300
+2022/06/21 18:55:02 sitecount.go:136: debug  2400
+2022/06/21 18:55:02 sitecount.go:136: debug  2500
+2022/06/21 18:55:02 sitecount.go:136: debug  2600
+2022/06/21 18:55:02 sitecount.go:136: debug  2700
+2022/06/21 18:55:02 sitecount.go:136: debug  2800
+2022/06/21 18:55:02 sitecount.go:136: debug  2900
+2022/06/21 18:55:02 sitecount.go:136: debug  3000
+2022/06/21 18:55:02 sitecount.go:136: debug  3100
+2022/06/21 18:55:02 sitecount.go:136: debug  3200
+2022/06/21 18:55:02 sitecount.go:136: debug  3300
+2022/06/21 18:55:02 sitecount.go:136: debug  3400
+2022/06/21 18:55:02 sitecount.go:136: debug  3500
+2022/06/21 18:55:02 sitecount.go:136: debug  3600
+2022/06/21 18:55:02 sitecount.go:136: debug  3700
+2022/06/21 18:55:02 sitecount.go:136: debug  3800
+2022/06/21 18:55:02 sitecount.go:136: debug  3900
+2022/06/21 18:55:02 sitecount.go:136: debug  4000
+2022/06/21 18:55:02 sitecount.go:136: debug  4100
+2022/06/21 18:55:02 sitecount.go:136: debug  4200
+2022/06/21 18:55:02 sitecount.go:136: debug  4300
+2022/06/21 18:55:02 sitecount.go:136: debug  4400
+2022/06/21 18:55:02 sitecount.go:136: debug  4500
+2022/06/21 18:55:02 sitecount.go:136: debug  4600
+2022/06/21 18:55:02 sitecount.go:136: debug  4700
+2022/06/21 18:55:02 sitecount.go:136: debug  4800
+2022/06/21 18:55:02 sitecount.go:136: debug  4900
+2022/06/21 18:55:02 sitecount.go:136: debug  5000
+2022/06/21 18:55:02 sitecount.go:136: debug  5100
+2022/06/21 18:55:02 sitecount.go:136: debug  5200
+2022/06/21 18:55:02 sitecount.go:136: debug  5300
+2022/06/21 18:55:02 sitecount.go:136: debug  5400
+2022/06/21 18:55:02 sitecount.go:136: debug  5500
+2022/06/21 18:55:02 sitecount.go:136: debug  5600
+2022/06/21 18:55:02 sitecount.go:136: debug  5700
+2022/06/21 18:55:02 sitecount.go:136: debug  5800
+2022/06/21 18:55:02 sitecount.go:136: debug  5900
+2022/06/21 18:55:02 sitecount.go:136: debug  6000
+2022/06/21 18:55:02 sitecount.go:136: debug  6100
+2022/06/21 18:55:02 sitecount.go:136: debug  6200
+2022/06/21 18:55:02 sitecount.go:136: debug  6300
+2022/06/21 18:55:02 sitecount.go:136: debug  6400
+2022/06/21 18:55:02 sitecount.go:136: debug  6500
+2022/06/21 18:55:02 sitecount.go:136: debug  6600
+2022/06/21 18:55:02 sitecount.go:136: debug  6700
+2022/06/21 18:55:02 sitecount.go:136: debug  6800
+2022/06/21 18:55:02 sitecount.go:136: debug  6900
+2022/06/21 18:55:02 sitecount.go:136: debug  7000
+2022/06/21 18:55:02 sitecount.go:136: debug  7100
+2022/06/21 18:55:02 sitecount.go:136: debug  7200
+2022/06/21 18:55:02 sitecount.go:136: debug  7300
+2022/06/21 18:55:02 sitecount.go:136: debug  7400
+2022/06/21 18:55:02 sitecount.go:136: debug  7500
+2022/06/21 18:55:02 sitecount.go:136: debug  7600
+2022/06/21 18:55:02 sitecount.go:136: debug  7700
+2022/06/21 18:55:02 sitecount.go:136: debug  7800
+2022/06/21 18:55:02 sitecount.go:136: debug  7900
+2022/06/21 18:55:02 sitecount.go:136: debug  8000
+2022/06/21 18:55:02 sitecount.go:136: debug  8100
+2022/06/21 18:55:02 sitecount.go:136: debug  8200
+2022/06/21 18:55:02 sitecount.go:136: debug  8300
+2022/06/21 18:55:02 sitecount.go:136: debug  8400
+2022/06/21 18:55:02 sitecount.go:136: debug  8500
+2022/06/21 18:55:02 sitecount.go:136: debug  8600
+2022/06/21 18:55:02 sitecount.go:136: debug  8700
+2022/06/21 18:55:02 sitecount.go:136: debug  8800
+2022/06/21 18:55:02 sitecount.go:136: debug  8900
+2022/06/21 18:55:02 sitecount.go:136: debug  9000
+2022/06/21 18:55:02 sitecount.go:136: debug  9100
+2022/06/21 18:55:02 sitecount.go:136: debug  9200
+2022/06/21 18:55:02 sitecount.go:136: debug  9300
+2022/06/21 18:55:02 sitecount.go:136: debug  9400
+2022/06/21 18:55:02 sitecount.go:136: debug  9500
+2022/06/21 18:55:02 sitecount.go:136: debug  9600
+2022/06/21 18:55:02 sitecount.go:136: debug  9700
+2022/06/21 18:55:02 sitecount.go:136: debug  9800
+2022/06/21 18:55:02 sitecount.go:136: debug  9900
+2022/06/21 18:55:02 sitecount.go:136: debug  10000
+2022/06/21 18:55:02 sitecount.go:136: debug  10100
+2022/06/21 18:55:02 sitecount.go:136: debug  10200
+2022/06/21 18:55:02 sitecount.go:136: debug  10300
+2022/06/21 18:55:02 sitecount.go:136: debug  10400
+2022/06/21 18:55:02 sitecount.go:136: debug  10500
+2022/06/21 18:55:02 sitecount.go:136: debug  10600
+2022/06/21 18:55:02 sitecount.go:136: debug  10700
+2022/06/21 18:55:02 sitecount.go:136: debug  10800
+2022/06/21 18:55:02 sitecount.go:136: debug  10900
+2022/06/21 18:55:02 sitecount.go:136: debug  11000
+2022/06/21 18:55:02 sitecount.go:136: debug  11100
+2022/06/21 18:55:02 sitecount.go:136: debug  11200
+2022/06/21 18:55:02 sitecount.go:136: debug  11300
+2022/06/21 18:55:02 sitecount.go:136: debug  11400
+2022/06/21 18:55:02 sitecount.go:136: debug  11500
+2022/06/21 18:55:02 sitecount.go:136: debug  11600
+2022/06/21 18:55:02 sitecount.go:136: debug  11700
+2022/06/21 18:55:02 sitecount.go:136: debug  11800
+2022/06/21 18:55:02 sitecount.go:136: debug  11900
+2022/06/21 18:55:02 sitecount.go:136: debug  12000
+2022/06/21 18:55:02 sitecount.go:136: debug  12100
+2022/06/21 18:55:02 sitecount.go:136: debug  12200
+2022/06/21 18:55:02 sitecount.go:136: debug  12300
+2022/06/21 18:55:02 sitecount.go:136: debug  12400
+2022/06/21 18:55:02 sitecount.go:136: debug  12500
+2022/06/21 18:55:02 sitecount.go:136: debug  12600
+2022/06/21 18:55:02 sitecount.go:136: debug  12700
+2022/06/21 18:55:02 sitecount.go:136: debug  12800
+2022/06/21 18:55:02 sitecount.go:136: debug  12900
+2022/06/21 18:55:02 sitecount.go:136: debug  13000
+2022/06/21 18:55:02 sitecount.go:136: debug  13100
+2022/06/21 18:55:02 sitecount.go:136: debug  13200
+2022/06/21 18:55:02 sitecount.go:136: debug  13300
+2022/06/21 18:55:02 sitecount.go:136: debug  13400
+2022/06/21 18:55:02 sitecount.go:136: debug  13500
+2022/06/21 18:55:02 sitecount.go:136: debug  13600
+2022/06/21 18:55:02 sitecount.go:136: debug  13700
+2022/06/21 18:55:02 sitecount.go:136: debug  13800
+2022/06/21 18:55:02 sitecount.go:136: debug  13900
+2022/06/21 18:55:02 sitecount.go:136: debug  14000
+2022/06/21 18:55:02 sitecount.go:136: debug  14100
+2022/06/21 18:55:02 sitecount.go:136: debug  14200
+2022/06/21 18:55:02 sitecount.go:136: debug  14300
+2022/06/21 18:55:02 sitecount.go:136: debug  14400
+2022/06/21 18:55:02 sitecount.go:136: debug  14500
+2022/06/21 18:55:02 sitecount.go:136: debug  14600
+2022/06/21 18:55:02 sitecount.go:136: debug  14700
+2022/06/21 18:55:02 sitecount.go:136: debug  14800
+2022/06/21 18:55:02 sitecount.go:136: debug  14900
+2022/06/21 18:55:02 sitecount.go:136: debug  15000
+2022/06/21 18:55:02 sitecount.go:136: debug  15100
+2022/06/21 18:55:02 sitecount.go:136: debug  15200
+2022/06/21 18:55:02 sitecount.go:136: debug  15300
+2022/06/21 18:55:02 sitecount.go:136: debug  15400
+2022/06/21 18:55:02 sitecount.go:136: debug  15500
+2022/06/21 18:55:02 sitecount.go:136: debug  15600
+2022/06/21 18:55:02 sitecount.go:136: debug  15700
+2022/06/21 18:55:02 sitecount.go:136: debug  15800
+2022/06/21 18:55:02 sitecount.go:136: debug  15900
+2022/06/21 18:55:02 sitecount.go:136: debug  16000
+2022/06/21 18:55:02 sitecount.go:136: debug  16100
+2022/06/21 18:55:02 sitecount.go:136: debug  16200
+2022/06/21 18:55:03 sitecount.go:136: debug  16300
+2022/06/21 18:55:03 sitecount.go:136: debug  16400
+2022/06/21 18:55:03 sitecount.go:136: debug  16500
+2022/06/21 18:55:03 sitecount.go:136: debug  16600
+2022/06/21 18:55:03 sitecount.go:136: debug  16700
+2022/06/21 18:55:03 sitecount.go:136: debug  16800
+2022/06/21 18:55:03 sitecount.go:136: debug  16900
+2022/06/21 18:55:03 sitecount.go:136: debug  17000
+2022/06/21 18:55:03 sitecount.go:136: debug  17100
+2022/06/21 18:55:03 sitecount.go:136: debug  17200
+2022/06/21 18:55:03 sitecount.go:136: debug  17300
+2022/06/21 18:55:03 sitecount.go:136: debug  17400
+2022/06/21 18:55:03 sitecount.go:136: debug  17500
+2022/06/21 18:55:03 sitecount.go:136: debug  17600
+2022/06/21 18:55:03 sitecount.go:136: debug  17700
+2022/06/21 18:55:03 sitecount.go:136: debug  17800
+2022/06/21 18:55:03 sitecount.go:136: debug  17900
+2022/06/21 18:55:03 sitecount.go:136: debug  18000
+2022/06/21 18:55:03 sitecount.go:136: debug  18100
+2022/06/21 18:55:03 sitecount.go:136: debug  18200
+2022/06/21 18:55:03 sitecount.go:136: debug  18300
+2022/06/21 18:55:03 sitecount.go:136: debug  18400
+2022/06/21 18:55:03 sitecount.go:136: debug  18500
+2022/06/21 18:55:03 sitecount.go:136: debug  18600
+2022/06/21 18:55:03 sitecount.go:136: debug  18700
+2022/06/21 18:55:03 sitecount.go:136: debug  18800
+2022/06/21 18:55:03 sitecount.go:136: debug  18900
+2022/06/21 18:55:03 sitecount.go:136: debug  19000
+2022/06/21 18:55:03 sitecount.go:136: debug  19100
+2022/06/21 18:55:03 sitecount.go:136: debug  19200
+2022/06/21 18:55:03 sitecount.go:136: debug  19300
+2022/06/21 18:55:03 sitecount.go:136: debug  19400
+2022/06/21 18:55:03 sitecount.go:136: debug  19500
+2022/06/21 18:55:03 sitecount.go:136: debug  19600
+2022/06/21 18:55:03 sitecount.go:136: debug  19700
+2022/06/21 18:55:03 sitecount.go:136: debug  19800
+2022/06/21 18:55:03 sitecount.go:136: debug  19900
+2022/06/21 18:55:03 sitecount.go:136: debug  20000
+2022/06/21 18:55:03 sitecount.go:136: debug  20100
+2022/06/21 18:55:03 sitecount.go:136: debug  20200
+2022/06/21 18:55:03 sitecount.go:136: debug  20300
+2022/06/21 18:55:03 sitecount.go:136: debug  20400
+2022/06/21 18:55:03 sitecount.go:136: debug  20500
+2022/06/21 18:55:03 sitecount.go:136: debug  20600
+2022/06/21 18:55:03 sitecount.go:136: debug  20700
+2022/06/21 18:55:03 sitecount.go:136: debug  20800
+2022/06/21 18:55:03 sitecount.go:136: debug  20900
+2022/06/21 18:55:03 sitecount.go:136: debug  21000
+2022/06/21 18:55:03 sitecount.go:136: debug  21100
+2022/06/21 18:55:03 sitecount.go:136: debug  21200
+2022/06/21 18:55:03 sitecount.go:136: debug  21300
+2022/06/21 18:55:03 sitecount.go:136: debug  21400
+2022/06/21 18:55:03 sitecount.go:136: debug  21500
+2022/06/21 18:55:03 sitecount.go:136: debug  21600
+2022/06/21 18:55:03 sitecount.go:136: debug  21700
+2022/06/21 18:55:03 sitecount.go:136: debug  21800
+2022/06/21 18:55:03 sitecount.go:136: debug  21900
+2022/06/21 18:55:03 sitecount.go:136: debug  22000
+2022/06/21 18:55:03 sitecount.go:136: debug  22100
+2022/06/21 18:55:03 sitecount.go:136: debug  22200
+2022/06/21 18:55:03 sitecount.go:136: debug  22300
+2022/06/21 18:55:03 sitecount.go:136: debug  22400
+2022/06/21 18:55:03 sitecount.go:136: debug  22500
+2022/06/21 18:55:03 sitecount.go:136: debug  22600
+2022/06/21 18:55:03 sitecount.go:136: debug  22700
+2022/06/21 18:55:03 sitecount.go:136: debug  22800
+2022/06/21 18:55:03 sitecount.go:136: debug  22900
+2022/06/21 18:55:03 sitecount.go:136: debug  23000
+2022/06/21 18:55:03 sitecount.go:136: debug  23100
+2022/06/21 18:55:03 sitecount.go:136: debug  23200
+2022/06/21 18:55:03 sitecount.go:136: debug  23300
+2022/06/21 18:55:03 sitecount.go:136: debug  23400
+2022/06/21 18:55:03 sitecount.go:136: debug  23500
+2022/06/21 18:55:03 sitecount.go:136: debug  23600
+2022/06/21 18:55:03 sitecount.go:136: debug  23700
+2022/06/21 18:55:03 sitecount.go:141: debug  统计采集量luacodeinfo完成...

+ 17 - 12
src/luatask/sitecount.go

@@ -16,18 +16,19 @@ import (
 )
 
 type SiteInfo struct {
-	Site            string `json:""`                //站点
-	Num             int    `json:"averagenum"`      //每日网站发布平均量
-	Modifyuser      string `json:"modifyuser"`      //维护人
-	State           string `json:"state"`           //网站状态
-	Domain          string `json:"domain"`          //域名
-	Stype           string `json:"stype"`           //网站类型
-	Platform        string `json:"platform"`        //所属平台
-	Coverage        string `json:"coverage"`        //覆盖率
-	ListAllNum      int    `json:"listallnum"`      //href去重,当天采集数据量
-	ListSuccessNum  int    `json:"listsuccessnum"`  //href去重,当天采集成功数据量
-	PTimeSuccessNum int    `json:"ptimesuccessnum"` //href去重,当天发布采集成功数据量
-	Comeintime      int64  `json:"comeintime"`      //href去重,当天发布采集成功数据量
+	Site              string `json:""`                  //站点
+	Num               int    `json:"averagenum"`        //每日网站发布平均量
+	Modifyuser        string `json:"modifyuser"`        //维护人
+	State             string `json:"state"`             //网站状态
+	Domain            string `json:"domain"`            //域名
+	Stype             string `json:"stype"`             //网站类型
+	Platform          string `json:"platform"`          //所属平台
+	Coverage          string `json:"coverage"`          //覆盖率
+	ListAllNum        int    `json:"listallnum"`        //href去重,当天采集数据量
+	ListSuccessNum    int    `json:"listsuccessnum"`    //href去重,当天采集成功数据量
+	PTimeSuccessNum   int    `json:"ptimesuccessnum"`   //href去重,当天发布采集成功数据量
+	PTimeSuccessDbNum int    `json:"ptimesuccessdbnum"` //href去重,data_bak当天发布采集成功数据量
+	Comeintime        int64  `json:"comeintime"`        //href去重,当天发布采集成功数据量
 }
 
 var SiteInfoModel = `{
@@ -94,6 +95,7 @@ func GetAllSpidercodeNum(siteInfoMap map[string]*SiteInfo) {
 	}
 	fields := map[string]interface{}{
 		"repeatptimesuccessnum":    1,
+		"repeatptimesuccessdbnum":  1,
 		"repeatdownloadallnum":     1,
 		"repeatdownloadsuccessnum": 1,
 		"site":                     1,
@@ -117,6 +119,7 @@ func GetAllSpidercodeNum(siteInfoMap map[string]*SiteInfo) {
 			repeatdownloadallnum := qu.IntAll(tmp["repeatdownloadallnum"])
 			repeatdownloadsuccessnum := qu.IntAll(tmp["repeatdownloadsuccessnum"])
 			repeatptimesuccessnum := qu.IntAll(tmp["repeatptimesuccessnum"])
+			repeatptimesuccessdbnum := qu.IntAll(tmp["repeatptimesuccessdbnum"])
 			if platform == "python" {
 				site = site + "(python)"
 			}
@@ -125,6 +128,7 @@ func GetAllSpidercodeNum(siteInfoMap map[string]*SiteInfo) {
 				info.ListAllNum += repeatdownloadallnum
 				info.ListSuccessNum += repeatdownloadsuccessnum
 				info.PTimeSuccessNum += repeatptimesuccessnum
+				info.PTimeSuccessDbNum += repeatptimesuccessdbnum
 			}
 			lock.Unlock()
 		}(tmp)
@@ -158,6 +162,7 @@ func GetSiteInfoExcel(allSpiderInfo map[string]*SiteInfo) {
 		row.AddCell().SetValue(info.ListAllNum)
 		row.AddCell().SetValue(info.ListSuccessNum)
 		row.AddCell().SetValue(info.PTimeSuccessNum)
+		row.AddCell().SetValue(info.PTimeSuccessDbNum)
 		coverage := float64(info.PTimeSuccessNum) / float64(info.Num)
 		fill := &xlsx.Fill{
 			PatternType: "solid",

+ 79 - 18
src/luatask/task.go

@@ -24,6 +24,7 @@ const FailedNumLimit = 3
 var CodeInfoMap map[string]*Spider
 var AllHref map[string]string
 var SameDayHref map[string]string
+var DataBakAllHref map[string]string
 var StateFeedBackErr = map[int]string{
 	0:   "timeout",
 	200: "analysis",
@@ -102,10 +103,11 @@ type Spider struct {
 	RepeatDownloadFailedNum  int `json:"repeatdownloadfailednum"`  //下载失败量
 	RepeatNoDownloadNum      int `json:"repeatnodownloadnum"`      //未下载量
 	//基于comeintime去重的当天下载量
-	RepeatPTimeAllNum        int `json:"repeatptimeallnum"`        //当天总下载量
-	RepeatPTimeSuccessNum    int `json:"repeatptimesuccessnum"`    //当天下载成功量
-	RepeatPTimeFailedNum     int `json:"repeatptimefailednum"`     //当天下载失败量
-	RepeatPTimeNoDownloadNum int `json:"repeatptimenodownloadnum"` //当天未下载量
+	RepeatPTimeAllNum            int `json:"repeatptimeallnum"`        //当天总下载量
+	RepeatPTimeSuccessNum        int `json:"repeatptimesuccessnum"`    //当天下载成功量
+	RepeatPTimeSuccessDataBakNum int `json:"repeatptimesuccessdbnum"`  //data_bak当天发布数据量
+	RepeatPTimeFailedNum         int `json:"repeatptimefailednum"`     //当天下载失败量
+	RepeatPTimeNoDownloadNum     int `json:"repeatptimenodownloadnum"` //当天未下载量
 
 	ListDownloadAllTimes int                   `json:"listdownloadalltimes"` //一天内列表页总下载次数
 	ListOhPercentTimes   int                   `json:"listohpercenttimes"`   //列表页采集百分百次数
@@ -175,9 +177,9 @@ func StartTask() {
 	GetSpiderHighListDownloadNum() //统计spider_highlistdata爬虫列表页下载量、下载失败量、未下载量
 	GetSpiderListDownloadNum()     //统计spider_listdata爬虫列表页下载量、下载失败量、未下载量
 	GetSpiderDataBakDownloadNum()  //统计data_bak爬虫下载量
-	GetSpiderDownloadRateDataNew()
-	GetSpiderWarnErrData()
-	GetPythonWarnErrData()
+	GetSpiderDownloadRateDataNew() //下载率
+	GetSpiderWarnErrData()         //异常信息
+	GetPythonWarnErrData()         //python相关
 	//SaveCodeInfo()
 	CreateTaskProcess()
 	// GetDownloadNumber() //统计下载量
@@ -191,6 +193,7 @@ func InitInfo() {
 	CodeInfoMap = map[string]*Spider{} //初始化
 	AllHref = map[string]string{}
 	SameDayHref = map[string]string{}
+	DataBakAllHref = map[string]string{}
 	UserTaskNum = map[string]map[string]int{}
 	StartTime, EndTime = util.GetWorkDayTimeUnix()
 	Publishtime = qu.FormatDateByInt64(&StartTime, qu.Date_Short_Layout)
@@ -697,6 +700,8 @@ func GetSpiderDataBakDownloadNum() {
 	}
 	fields := map[string]interface{}{
 		"spidercode": 1,
+		"href":       1,
+		"site":       1,
 	}
 	lock := &sync.Mutex{}
 	wg := &sync.WaitGroup{}
@@ -712,9 +717,16 @@ func GetSpiderDataBakDownloadNum() {
 				wg.Done()
 			}()
 			code := qu.ObjToString(tmp["spidercode"])
+			href := qu.ObjToString(tmp["href"])
+			site := qu.ObjToString(tmp["site"])
 			lock.Lock()
 			defer lock.Unlock()
 			if sp := CodeInfoMap[code]; sp != nil {
+				//单独统计data_bak每个爬虫当天发布的数据量
+				if DataBakAllHref[href] != site {
+					sp.RepeatPTimeSuccessDataBakNum++
+					DataBakAllHref[href] = site
+				}
 				if sp.DownloadAllNum == 0 || sp.PTimeAllNum != 0 {
 					return
 				}
@@ -730,7 +742,7 @@ func GetSpiderDataBakDownloadNum() {
 		tmp = map[string]interface{}{}
 	}
 	wg.Wait()
-
+	DataBakAllHref = map[string]string{}
 	//wg := &sync.WaitGroup{}
 	//ch := make(chan bool, 5)
 	//n := 0
@@ -1185,14 +1197,63 @@ func GetSpiderWarnErrData() {
 
 //汇总python错误信息数据
 func GetPythonWarnErrData() {
-	GetPythonDownloadNum() //统计总下载量
-	GetPythonErrData()     //统计异常信息
+	GetPythonListDownloadNum()   //统计列表页采集量
+	GetPythonDetailDownloadNum() //统计data_bak总下载量
+	GetPythonErrData()           //统计异常信息
 }
 
-//python统计总下载量
-func GetPythonDownloadNum() {
+//python统计列表页采集量
+func GetPythonListDownloadNum() {
+	defer qu.Catch()
+	logger.Debug("python列表页数据下载量统计开始...")
+	sess := util.MgoPy.GetMgoConn()
+	defer util.MgoPy.DestoryMongoConn(sess)
+	query := map[string]interface{}{
+		"runtime": Publishtime,
+		"rel_count": map[string]interface{}{
+			"$gt": 0,
+		},
+	}
+	fields := map[string]interface{}{
+		"spidercode": 1,
+		"rel_count":  1,
+	}
+	lock := &sync.Mutex{}
+	wg := &sync.WaitGroup{}
+	ch := make(chan bool, 5)
+	it := sess.DB(util.MgoPy.DbName).C("list").Find(&query).Select(&fields).Iter()
+	n := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
+		wg.Add(1)
+		ch <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			code := qu.ObjToString(tmp["spidercode"])
+			count := qu.IntAll(tmp["rel_count"])
+			lock.Lock()
+			if sp := CodeInfoMap[code]; sp != nil {
+				//href不去重统计
+				sp.DownloadAllNum += count
+				sp.RepeatDownloadAllNum += count
+			}
+			lock.Unlock()
+		}(tmp)
+		if n%1000 == 0 {
+			logger.Debug(n)
+		}
+		tmp = map[string]interface{}{}
+	}
+	wg.Wait()
+	logger.Debug("python数据下载量统计完成...")
+}
+
+//python三级页统计总下载量
+func GetPythonDetailDownloadNum() {
 	defer qu.Catch()
-	logger.Debug("python数据下载量统计开始...")
+	logger.Debug("python三级页数据下载量统计开始...")
 	sess := util.MgoPy.GetMgoConn()
 	defer util.MgoPy.DestoryMongoConn(sess)
 	query := map[string]interface{}{
@@ -1225,9 +1286,8 @@ func GetPythonDownloadNum() {
 			samaDay := strings.Contains(ptime, Publishtime) //判断是否是当天的数据
 			lock.Lock()
 			if sp := CodeInfoMap[code]; sp != nil {
-				//href不去重统计
-				sp.DownloadAllNum++
-				sp.RepeatDownloadAllNum++
+				//sp.DownloadAllNum++
+				//sp.RepeatDownloadAllNum++
 				if sendflag == "true" {
 					sp.DownloadSuccessNum++
 					sp.RepeatDownloadSuccessNum++
@@ -1238,6 +1298,7 @@ func GetPythonDownloadNum() {
 					if sendflag == "true" {
 						sp.PTimeSuccessNum++
 						sp.RepeatPTimeSuccessNum++
+						sp.RepeatPTimeSuccessDataBakNum++
 					}
 				}
 			}
@@ -1881,7 +1942,7 @@ func SaveCodeInfo() {
 			}
 			lock.Lock()
 			if len(arr) > 500 {
-				util.MgoE.SaveBulk("luacodeinfo", arr...)
+				util.MgoE.SaveBulk("luacodeinfo_back", arr...)
 				arr = []map[string]interface{}{}
 			}
 			lock.Unlock()
@@ -1889,7 +1950,7 @@ func SaveCodeInfo() {
 	}
 	wg.Wait()
 	if len(arr) > 0 {
-		util.MgoE.SaveBulk("luacodeinfo", arr...)
+		util.MgoE.SaveBulk("luacodeinfo_back", arr...)
 		arr = []map[string]interface{}{}
 	}
 	logger.Debug("爬虫基本信息生成完成...")