main.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. package main
  2. import (
  3. "fmt"
  4. "github.com/robfig/cron/v3"
  5. "go.mongodb.org/mongo-driver/bson"
  6. "go.uber.org/zap"
  7. utils "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  8. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  9. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  10. "time"
  11. )
  12. var (
  13. MgoB *mongodb.MongodbSim
  14. MgoC *mongodb.MongodbSim
  15. Rest = make(map[string]interface{}, 0) //存储配置 栏目
  16. // 更新mongo
  17. //千里马对应的标讯 channel
  18. channels = []string{"招标公告", "重新招标", "意见征集", "招标预告", "信息变更", "答疑公告", "废标公告", "流标公告",
  19. "开标公示", "候选人公示", "中标通知", "合同公告", "验收合同", "违规公告", "其他公告", "预告", "公告", "变更", "结果", "其他"}
  20. channels2 = []string{"可研", "立项", "核准", "备案", "环评", "审批", "施工许可"} // 拟建数据
  21. channels3 = []string{"国土"} //产权数据
  22. //标讯数据细分,招标预告、招标公告、结果公告
  23. predictionChannels = []string{"预告", "招标预告", "意见征集"} //招标预告
  24. biddingChannels = []string{"公告", "变更", "招标公告", "重新招标", "信息变更", "答疑公告"} //招标公告
  25. resultChannels = []string{"废标公告", "流标公告", "结果", "开标公示", "候选人公示", "中标通知", "合同公告"} //结果公告
  26. Yesterday time.Time
  27. Today time.Time
  28. dataSource = make(map[string]interface{}, 0) //数据源收录指标
  29. dataCollection = make(map[string]interface{}, 0) //数据采集指标
  30. dataCompete = make(map[string]interface{}, 0) //竞品对比指标
  31. dataTime = make(map[string]interface{}, 0) //数据时效指标
  32. dataQuality = make(map[string]interface{}, 0) //数据质量指标
  33. )
  34. func main() {
  35. local, _ := time.LoadLocation("Asia/Shanghai")
  36. c := cron.New(cron.WithLocation(local), cron.WithSeconds())
  37. _, err := c.AddFunc(GF.Cron.Spec, getIndicators)
  38. if err != nil {
  39. log.Error("main", zap.Error(err))
  40. }
  41. log.Info("main", zap.String("spec", GF.Cron.Spec))
  42. c.Start()
  43. defer c.Stop()
  44. select {}
  45. }
  46. // getIndicators 获取数据指标数据
  47. func getIndicators() {
  48. // 获取昨天零点和今天零点的时间戳
  49. now := time.Now()
  50. start := GF.Cron.Start
  51. end := GF.Cron.End
  52. if start == 0 {
  53. start = -1
  54. }
  55. Yesterday = time.Date(now.Year(), now.Month(), now.Day()+start, 0, 0, 0, 0, time.Local)
  56. Today = time.Date(now.Year(), now.Month(), now.Day()+end, 0, 0, 0, 0, time.Local)
  57. dataSource = make(map[string]interface{}, 0) //数据源收录指标
  58. dataCollection = make(map[string]interface{}, 0) //数据采集指标
  59. dataCompete = make(map[string]interface{}, 0) //竞品对比指标
  60. dataTime = make(map[string]interface{}, 0) //数据时效指标
  61. dataQuality = make(map[string]interface{}, 0) //数据质量指标
  62. //1. 数据采集指标
  63. getCollection()
  64. //2.统计竞品对比指标
  65. dayOfWeek := Today.Weekday()
  66. if dayOfWeek == time.Wednesday || GF.Cron.Week {
  67. coverageA()
  68. coverageB()
  69. }
  70. //3.数据时效指标
  71. getTimeLines()
  72. //4.数据行质量合格率,暂时写死
  73. dataQuality["数据行质量合格率"] = GF.Cron.QualityRate
  74. //5.统计 数据源收录指标
  75. getCollectionData()
  76. Rest["数据源收录指标"] = dataSource
  77. Rest["数据采集指标"] = dataCollection
  78. Rest["竞品对比指标"] = dataCompete
  79. Rest["数据时效指标"] = dataTime
  80. Rest["数据质量指标"] = dataQuality
  81. Rest["日期"] = Yesterday.Format("2006-01-02")
  82. MgoB.Save("bidding_zhibiao", Rest)
  83. fmt.Println("over")
  84. }
  85. // getCollection 获取数据采集指标
  86. func getCollection() {
  87. //1.数据日采集量
  88. whereBidding := map[string]interface{}{
  89. "comeintime": map[string]interface{}{
  90. "$gt": Yesterday.Unix(),
  91. "$lte": Today.Unix(),
  92. },
  93. }
  94. biddingCount := MgoB.Count("bidding", whereBidding)
  95. if biddingCount == 0 {
  96. SendMail("数据昨日采为0", "请检查相关流程")
  97. return
  98. }
  99. dataCollection["数据采集日采集量"] = biddingCount
  100. log.Info("getCollection", zap.Int("数据日采集量", biddingCount))
  101. //2. 统计爬虫总量
  102. whereT := map[string]interface{}{
  103. "state": map[string]interface{}{
  104. "$ne": []interface{}{4, 10},
  105. },
  106. }
  107. collectAll := MgoC.Count("luaconfig", whereT)
  108. dataCollection["爬虫总量"] = collectAll
  109. log.Info("getCollection", zap.Int("爬虫总量", collectAll))
  110. //3. 爬虫异常数量
  111. whereCollectErr := map[string]interface{}{
  112. "l_comeintime": map[string]interface{}{
  113. "$gt": Yesterday.Unix(),
  114. "$lte": Today.Unix(),
  115. },
  116. }
  117. collectErrCount := MgoC.Count("task", whereCollectErr)
  118. dataCollection["爬虫日异常量"] = collectErrCount
  119. errPercentage := (float64(collectErrCount) / float64(collectAll)) * 100.0
  120. dataCollection["爬虫日异常量比例"] = fmt.Sprintf("%.2f%%", errPercentage)
  121. log.Info("getCollection", zap.Int("爬虫日异常量", collectErrCount))
  122. //4.爬虫上架时效(小时)
  123. dayOfWeek := Today.Weekday() // 获取星期几
  124. lastSunday := time.Date(Today.Year(), Today.Month(), Today.Day()-1, 0, 0, 0, 0, time.Local) //上周日
  125. lastMonday := time.Date(Today.Year(), Today.Month(), Today.Day()-7, 0, 0, 0, 0, time.Local) //上周一
  126. //4. 周一或者强制统计,出上周一到周日的 爬虫上架时效/爬虫维护时效(小时)
  127. if dayOfWeek == time.Monday || GF.Cron.Week {
  128. //4.1 爬虫上架时效(小时)
  129. whereShelves := map[string]interface{}{
  130. "comeintime": map[string]interface{}{
  131. "$gte": lastMonday.Unix(),
  132. "$lte": lastSunday.Unix(),
  133. },
  134. }
  135. shelves, _ := MgoC.Find("luaconfig", whereShelves, nil, map[string]interface{}{"code": 1, "comeintime": 1}, false, -1, -1)
  136. if len(*shelves) > 0 {
  137. shelvesCount := int64(0)
  138. shelvesTime := int64(0)
  139. for _, v := range *shelves {
  140. code := utils.ObjToString(v["code"])
  141. shelveNew, _ := MgoC.FindOne("lua_logs_auditor_new", map[string]interface{}{"code": code, "types": "审核"})
  142. if shelveNew == nil {
  143. continue
  144. } else {
  145. comeintimeNew := utils.Int64All((*shelveNew)["comeintime"])
  146. comeintime := utils.Int64All(v["comeintime"])
  147. if comeintimeNew == 0 {
  148. continue
  149. }
  150. if comeintimeNew-comeintime > 0 {
  151. shelvesTime = shelvesTime + comeintimeNew - comeintime
  152. shelvesCount++
  153. }
  154. }
  155. }
  156. if shelvesCount > 0 {
  157. dataCollection["爬虫上架时效(小时)"] = (shelvesTime / shelvesCount) / 3600
  158. log.Info("getCollection", zap.Any("爬虫上架时效", (shelvesTime/shelvesCount)/3600))
  159. } else {
  160. dataCollection["爬虫上架时效(小时)"] = ""
  161. }
  162. }
  163. //4.2 爬虫维护时效(小时)
  164. whereAuditor := map[string]interface{}{
  165. "comeintime": map[string]interface{}{
  166. "$gte": lastMonday.Unix(),
  167. "$lte": lastSunday.Unix(),
  168. },
  169. "types": "审核",
  170. }
  171. maintainCount := int64(0) //维护数量
  172. maintainTime := int64(0) //维护总时间
  173. auditors, _ := MgoC.Find("lua_logs_auditor", whereAuditor, nil, nil, false, -1, -1)
  174. if len(*auditors) > 0 {
  175. for _, v := range *auditors {
  176. code := utils.ObjToString(v["code"])
  177. shelveNew, _ := MgoC.FindOne("lua_logs_auditor_new", map[string]interface{}{"code": code, "types": "审核"})
  178. if shelveNew == nil || len(*shelveNew) == 0 {
  179. taskWhere := map[string]interface{}{
  180. "s_code": code,
  181. "i_state": 4,
  182. }
  183. tasks, _ := MgoC.Find("task", taskWhere, map[string]interface{}{"l_complete": -1}, nil, false, -1, -1)
  184. if len(*tasks) > 0 {
  185. completeTime := utils.Int64All((*tasks)[0]["l_comeintime"])
  186. comeinTime := utils.Int64All(v["comeintime"])
  187. diff := completeTime - comeinTime
  188. if diff > 0 {
  189. maintainCount++
  190. maintainTime += diff
  191. }
  192. }
  193. }
  194. }
  195. if maintainCount > 0 {
  196. dataCollection["爬虫维护时效(小时)"] = (maintainTime / maintainCount) / 3600
  197. log.Info("getCollection", zap.Any("爬虫维护时效(小时)", (maintainTime/maintainCount)/3600))
  198. } else {
  199. dataCollection["爬虫维护时效(小时)"] = ""
  200. }
  201. }
  202. }
  203. }
  204. // coverageA 统计 剑鱼对千里马覆盖率
  205. func coverageA() {
  206. //5.竞品覆盖率,每周4统计上周的数据
  207. sessC := MgoC.GetMgoConn()
  208. defer MgoC.DestoryMongoConn(sessC)
  209. //获取上周3,千里马的招标数据;然后获取标讯前后个3天,一共7天的所有数据,对比看标题或者项目名称是否存在
  210. lastWednesday := time.Date(Today.Year(), Today.Month(), Today.Day()-7, 0, 0, 0, 0, time.Local)
  211. whereQlm := map[string]interface{}{
  212. "publishtime": lastWednesday.Format("2006-01-02"),
  213. "site": "千里马",
  214. }
  215. query := sessC.DB("qlm").C("data_merge").Find(whereQlm).Select(map[string]interface{}{"title": 1, "projectname": 1, "channel": 1}).Iter()
  216. count := 0
  217. qlmData := make([]map[string]interface{}, 0) //标讯所有数据
  218. njData := make([]map[string]interface{}, 0) //拟建数据
  219. cqData := make([]map[string]interface{}, 0) //产权数据
  220. preData := make([]map[string]interface{}, 0) //招标预告数据
  221. biddingData := make([]map[string]interface{}, 0) // 招标公告数据
  222. resultData := make([]map[string]interface{}, 0) // 结果公告数据
  223. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  224. data := map[string]interface{}{
  225. "title": tmp["title"],
  226. "projectname": tmp["projectname"],
  227. }
  228. channel := utils.ObjToString(tmp["channel"])
  229. //标讯所有数据
  230. if IsInStringArray(channel, channels) {
  231. qlmData = append(qlmData, data)
  232. }
  233. if IsInStringArray(channel, predictionChannels) {
  234. preData = append(preData, data)
  235. }
  236. if IsInStringArray(channel, biddingChannels) {
  237. biddingData = append(biddingData, data)
  238. }
  239. if IsInStringArray(channel, resultChannels) {
  240. resultData = append(resultData, data)
  241. }
  242. //拟建数据
  243. if IsInStringArray(channel, channels2) {
  244. njData = append(njData, data)
  245. }
  246. //产权数据
  247. if IsInStringArray(channel, channels3) {
  248. cqData = append(cqData, data)
  249. }
  250. }
  251. log.Info("getIndicators", zap.Int("千里马上周三总数", count))
  252. biddingWhere := map[string]interface{}{
  253. "publishtime": map[string]interface{}{
  254. "$gte": lastWednesday.AddDate(0, 0, -3).Unix(),
  255. "$lte": lastWednesday.AddDate(0, 0, 3).Unix(),
  256. },
  257. }
  258. biddingDatas, _ := MgoB.Find("bidding", biddingWhere, nil, map[string]interface{}{"title": 1, "projectname": 1}, false, -1, -1)
  259. log.Info("coverageA", zap.Int("标讯一周总数", len(*biddingDatas)))
  260. // 将切片B中的标题和项目名称分别存储在哈希表中
  261. titlesInB, projectsInB := getUniqueFields(*biddingDatas)
  262. //5.1.1 统计 标讯-整体 数据
  263. matches := countMatches(qlmData, titlesInB, projectsInB)
  264. matchesA := map[string]interface{}{
  265. "标讯整体": map[string]interface{}{
  266. "date": lastWednesday.Format("2006-01-02"),
  267. "count": len(qlmData),
  268. "match": matches,
  269. "no-match": len(qlmData) - matches,
  270. "qlm-total": count,
  271. "rate": fmt.Sprintf("%.2f%%", float64(matches)/float64(len(qlmData))*100),
  272. },
  273. }
  274. //5.1.2 统计 标讯-招标预告 数据
  275. matchesPre := countMatches(preData, titlesInB, projectsInB)
  276. matchesA["招标预告"] = map[string]interface{}{
  277. "match": matchesPre,
  278. "no-match": len(preData) - matchesPre,
  279. "total": len(preData),
  280. "rate": fmt.Sprintf("%.2f%%", float64(matchesPre)/float64(len(preData))*100),
  281. }
  282. //5.1.3 统计 标讯-招标公告 数据
  283. matchBidding := countMatches(biddingData, titlesInB, projectsInB)
  284. matchesA["招标公告"] = map[string]interface{}{
  285. "match": matchBidding,
  286. "no-match": len(biddingData) - matchBidding,
  287. "total": len(biddingData),
  288. "rate": fmt.Sprintf("%.2f%%", float64(matchBidding)/float64(len(biddingData))*100),
  289. }
  290. //5.1.4 统计 标讯-结果公告 数据
  291. matchResult := countMatches(resultData, titlesInB, projectsInB)
  292. matchesA["结果公告"] = map[string]interface{}{
  293. "match": matchResult,
  294. "no-match": len(resultData) - matchResult,
  295. "total": len(resultData),
  296. "rate": fmt.Sprintf("%.2f%%", float64(matchResult)/float64(len(resultData))*100),
  297. }
  298. dataCompete["剑鱼对千里马覆盖率(标讯)"] = matchesA
  299. log.Info("coverageA", zap.String("剑鱼对千里马覆盖率-标讯", "处理完毕"))
  300. //5.2 拟建数据覆盖率
  301. matches2 := countMatches(njData, titlesInB, projectsInB)
  302. matchesB := map[string]interface{}{
  303. "match": matches2,
  304. "total": len(njData),
  305. "no-match": len(njData) - matches2,
  306. "date": lastWednesday.Format("2006-01-02"),
  307. "rate": fmt.Sprintf("%.2f%%", float64(matches2)/float64(len(njData))*100),
  308. }
  309. dataCompete["剑鱼对千里马覆盖率(拟建)"] = matchesB
  310. //5.3 产权数据统计
  311. matches3 := countMatches(cqData, titlesInB, projectsInB)
  312. matchesC := map[string]interface{}{
  313. "match": matches3,
  314. "total": len(cqData),
  315. "no-match": len(cqData) - matches3,
  316. "date": lastWednesday.Format("2006-01-02"),
  317. "rate": fmt.Sprintf("%.2f%%", float64(matches3)/float64(len(cqData))*100),
  318. }
  319. dataCompete["剑鱼对千里马覆盖率(产权)"] = matchesC
  320. log.Info("coverageA", zap.String("剑鱼对千里马覆盖率-产权", "处理完毕"))
  321. }
  322. // coverageB 统计 千里马对剑鱼的覆盖率
  323. func coverageB() {
  324. sessB := MgoB.GetMgoConn()
  325. defer MgoB.DestoryMongoConn(sessB)
  326. lastWednesday := time.Date(Today.Year(), Today.Month(), Today.Day()-7, 0, 0, 0, 0, time.Local)
  327. lastThursday := time.Date(Today.Year(), Today.Month(), Today.Day()-6, 0, 0, 0, 0, time.Local)
  328. whereQlm := map[string]interface{}{
  329. "publishtime": map[string]interface{}{
  330. "$gt": lastWednesday.Unix(),
  331. "$lte": lastThursday.Unix(),
  332. },
  333. }
  334. query := sessB.DB(GF.MongoB.DB).C("bidding").Find(whereQlm).Select(map[string]interface{}{"title": 1, "projectname": 1, "toptype": 1, "infoformat": 1}).Iter()
  335. count := 0
  336. qlmData := make([]map[string]interface{}, 0) //标讯所有数据
  337. njData := make([]map[string]interface{}, 0) //拟建数据
  338. cqData := make([]map[string]interface{}, 0) //产权数据
  339. preData := make([]map[string]interface{}, 0) //招标预告数据
  340. biddingData := make([]map[string]interface{}, 0) // 招标公告数据
  341. resultData := make([]map[string]interface{}, 0) // 结果公告数据
  342. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  343. data := map[string]interface{}{
  344. "title": tmp["title"],
  345. "projectname": tmp["projectname"],
  346. }
  347. toptype := utils.ObjToString(tmp["toptype"])
  348. //标讯所有数据
  349. if utils.IntAll(tmp["infoformat"]) == 1 {
  350. qlmData = append(qlmData, data)
  351. }
  352. if utils.IntAll(tmp["infoformat"]) == 2 {
  353. njData = append(njData, data)
  354. }
  355. if utils.IntAll(tmp["infoformat"]) == 3 {
  356. cqData = append(cqData, data)
  357. }
  358. if toptype == "预告" || toptype == "采购意向" {
  359. preData = append(preData, data)
  360. }
  361. if toptype == "招标" {
  362. biddingData = append(biddingData, data)
  363. }
  364. if toptype == "结果" {
  365. resultData = append(resultData, data)
  366. }
  367. }
  368. log.Info("coverageB", zap.Int("剑鱼上周三总数", count))
  369. biddingWhere := map[string]interface{}{
  370. "publishtime": map[string]interface{}{
  371. "$gte": lastWednesday.AddDate(0, 0, -3).Format("2006-01-02"),
  372. "$lte": lastWednesday.AddDate(0, 0, 3).Format("2006-01-02"),
  373. },
  374. }
  375. //竞品 qlm 数据库
  376. mgoQ := mongodb.MongodbSim{
  377. MongodbAddr: MgoC.MongodbAddr,
  378. DbName: "qlm",
  379. Size: 10,
  380. UserName: GF.MongoC.Username,
  381. Password: GF.MongoC.Password,
  382. Direct: GF.MongoC.Direct,
  383. }
  384. mgoQ.InitPool()
  385. biddingDatas, _ := mgoQ.Find("data_merge", biddingWhere, nil, map[string]interface{}{"title": 1, "projectname": 1}, false, -1, -1)
  386. log.Info("coverageB", zap.Int("千里马一周总数", len(*biddingDatas)))
  387. // 将切片B中的标题和项目名称分别存储在哈希表中
  388. titlesInB, projectsInB := getUniqueFields(*biddingDatas)
  389. //5.1.1 统计 标讯-整体 数据
  390. matches := countMatches(qlmData, titlesInB, projectsInB)
  391. //totalMatchesA := make(map[string]interface{}, 0) //剑鱼对千里马覆盖率(标讯)
  392. matchesA := map[string]interface{}{
  393. "标讯整体": map[string]interface{}{
  394. "date": lastWednesday.Format("2006-01-02"),
  395. "count": len(qlmData),
  396. "match": matches,
  397. "no-match": len(qlmData) - matches,
  398. "jianyu-total": count,
  399. "rate": fmt.Sprintf("%.2f%%", float64(matches)/float64(len(qlmData))*100),
  400. },
  401. }
  402. //5.1.2 统计 标讯-招标预告 数据
  403. matchesPre := countMatches(preData, titlesInB, projectsInB)
  404. matchesA["招标预告"] = map[string]interface{}{
  405. "match": matchesPre,
  406. "no-match": len(preData) - matchesPre,
  407. "total": len(preData),
  408. "rate": fmt.Sprintf("%.2f%%", float64(matchesPre)/float64(len(preData))*100),
  409. }
  410. //5.1.3 统计 标讯-招标公告 数据
  411. matchBidding := countMatches(biddingData, titlesInB, projectsInB)
  412. matchesA["招标公告"] = map[string]interface{}{
  413. "match": matchBidding,
  414. "no-match": len(biddingData) - matchBidding,
  415. "total": len(biddingData),
  416. "rate": fmt.Sprintf("%.2f%%", float64(matchBidding)/float64(len(biddingData))*100),
  417. }
  418. //5.1.4 统计 标讯-结果公告 数据
  419. matchResult := countMatches(resultData, titlesInB, projectsInB)
  420. matchesA["结果公告"] = map[string]interface{}{
  421. "match": matchResult,
  422. "no-match": len(resultData) - matchResult,
  423. "total": len(resultData),
  424. "rate": fmt.Sprintf("%.2f%%", float64(matchResult)/float64(len(resultData))*100),
  425. }
  426. dataCompete["千里马对剑鱼覆盖率(标讯)"] = matchesA
  427. log.Info("coverageB", zap.String("剑鱼对千里马覆盖率-标讯", "处理完毕"))
  428. //5.2 拟建数据覆盖率
  429. matches2 := countMatches(njData, titlesInB, projectsInB)
  430. matchesB := map[string]interface{}{
  431. "match": matches2,
  432. "total": len(njData),
  433. "no-match": len(njData) - matches2,
  434. "date": lastWednesday.Format("2006-01-02"),
  435. "rate": fmt.Sprintf("%.2f%%", float64(matches2)/float64(len(njData))*100),
  436. }
  437. dataCompete["千里马对剑鱼覆盖率(拟建)"] = matchesB
  438. //5.3 产权数据统计
  439. matches3 := countMatches(cqData, titlesInB, projectsInB)
  440. matchesC := map[string]interface{}{
  441. "match": matches3,
  442. "total": len(cqData),
  443. "no-match": len(cqData) - matches3,
  444. "date": lastWednesday.Format("2006-01-02"),
  445. "rate": fmt.Sprintf("%.2f%%", float64(matches3)/float64(len(cqData))*100),
  446. }
  447. dataCompete["千里马对剑鱼覆盖率(产权)"] = matchesC
  448. log.Info("coverageB", zap.String("千里马对剑鱼覆盖率-产权", "处理完毕"))
  449. }
  450. // getTimeLines 获取时效性指标
  451. func getTimeLines() {
  452. //6.数据整体流程均耗时(分钟)
  453. whereBidding := map[string]interface{}{
  454. "comeintime": map[string]interface{}{
  455. "$gt": Yesterday.Unix(),
  456. "$lte": Today.Unix(),
  457. },
  458. }
  459. sessB := MgoB.GetMgoConn()
  460. defer MgoB.DestoryMongoConn(sessB)
  461. fd := bson.M{"extracttype": 1, "sensitive": 1, "dataging": 1, "site": 1, "infoformat": 1, "comeintime": 1, "pici": 1, "publishtime": 1, "competehref": 1, "attach_text": 1}
  462. queryB := sessB.DB("qfw").C("bidding").Find(whereBidding).Select(fd).Iter()
  463. esCount := 0 //采集的数据需要生索引的数量
  464. biddingRealCount := 0
  465. pici_publish_totaltime := int64(0) //comeintime 和 生索引 publish 时间 差值的总和
  466. pici_comein_totaltime := int64(0) //publishtime 和 生索引 pici 时间 差值的总和
  467. for tmp := make(map[string]interface{}); queryB.Next(tmp); {
  468. if utils.IntAll(tmp["extracttype"]) != -1 && utils.ObjToString(tmp["sensitive"]) != "测试" && utils.IntAll(tmp["dataging"]) != 1 && utils.Float64All(tmp["infoformat"]) != 3 {
  469. comeintime := utils.Int64All(tmp["comeintime"])
  470. publishtime := utils.Int64All(tmp["publishtime"])
  471. pici := utils.Int64All(tmp["pici"])
  472. if pici > 0 {
  473. esCount++
  474. }
  475. if (comeintime-publishtime) < 12*60*60 && pici > 0 {
  476. biddingRealCount++
  477. diff1 := pici - publishtime
  478. diff2 := pici - comeintime
  479. pici_publish_totaltime += diff1
  480. pici_comein_totaltime += diff2
  481. }
  482. }
  483. }
  484. dataCollection["数据采集日索引量"] = esCount //数据采集指标-数据采集日索引量
  485. if biddingRealCount > 0 {
  486. pici_publish_avgtime := pici_publish_totaltime / int64(biddingRealCount)
  487. pici_comein_avgtime := pici_comein_totaltime / int64(biddingRealCount)
  488. dataTime["数据整体流程均耗时(分钟)"] = fmt.Sprintf("%.2f", float64(pici_publish_avgtime)/float64(60))
  489. dataTime["数据处理均耗时(分钟)"] = fmt.Sprintf("%.2f", float64(pici_comein_avgtime)/float64(60))
  490. dataTime["数据采集均耗时(分钟)"] = fmt.Sprintf("%.2f", float64(pici_publish_avgtime-pici_comein_avgtime)/float64(60))
  491. }
  492. }
  493. // getCollectionData 获取收录指标数据
  494. func getCollectionData() {
  495. //1.新收录数据源数量
  496. newCollectionWhere := map[string]interface{}{
  497. "comeintime": map[string]interface{}{
  498. "$gt": Yesterday.Unix(),
  499. },
  500. }
  501. newCount := MgoC.Count("site", newCollectionWhere)
  502. dataSource["新收录数据源数量"] = newCount
  503. //2.已收录数据源数量
  504. Count := MgoC.Count("site", nil)
  505. dataSource["已收录数据源数量"] = Count
  506. //3.待开发数据源数量
  507. whereConfig := map[string]interface{}{
  508. "state": 0,
  509. "comeintime": map[string]interface{}{
  510. "$gt": Yesterday.Unix(),
  511. },
  512. }
  513. unSiteCount := int64(0) //待开发数据源数量
  514. unSites, _ := MgoC.Find("luaconfig", whereConfig, nil, nil, false, -1, -1)
  515. if len(*unSites) > 0 {
  516. for _, v := range *unSites {
  517. code := utils.ObjToString(v["code"])
  518. num := MgoC.Count("lua_logs_auditor", map[string]interface{}{"code": code})
  519. if num == 0 {
  520. unSiteCount++
  521. }
  522. }
  523. }
  524. dataSource["待开发数据源数量"] = unSiteCount
  525. //4.各网站分类数据源数量
  526. // 用 map 来存储一级分类和对应的二级分类数据的数量
  527. categoryCounts := make(map[string]map[string]int)
  528. classes, _ := MgoC.Find("site", nil, nil, map[string]interface{}{"site_type": 1, "second_type": 1}, false, -1, -1)
  529. for _, v := range *classes {
  530. siteType := utils.ObjToString(v["site_type"])
  531. secondType := utils.ObjToString(v["second_type"])
  532. if _, ok := categoryCounts[siteType]; !ok {
  533. categoryCounts[siteType] = make(map[string]int)
  534. }
  535. categoryCounts[siteType][secondType]++
  536. }
  537. dataSource["网站分类数据源数量"] = categoryCounts
  538. //5.应采尽采率
  539. dataSource["应采尽采率"] = GF.Cron.CollectionRate
  540. }