package main import ( "fmt" "github.com/robfig/cron/v3" "go.mongodb.org/mongo-driver/bson" "go.uber.org/zap" utils "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/log" "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb" "time" ) var ( MgoB *mongodb.MongodbSim MgoC *mongodb.MongodbSim Rest = make(map[string]interface{}, 0) //存储配置 栏目 //// 更新mongo //updatePool = make(chan []map[string]interface{}, 5000) //千里马对应的招标 channel columns = []string{"招标公告", "重新招标", "意见征集", "招标预告", "信息变更", "答疑公告", "废标公告", "流标公告", "开标公示", "候选人公示", "中标通知", "合同公告", "验收合同", "违规公告", "其他公告"} ) func main() { local, _ := time.LoadLocation("Asia/Shanghai") c := cron.New(cron.WithLocation(local), cron.WithSeconds()) _, err := c.AddFunc(GF.Cron.Spec, getIndicators) if err != nil { log.Error("main", zap.Error(err)) } log.Info("main", zap.String("spec", GF.Cron.Spec)) c.Start() defer c.Stop() select {} } //获取数据指标数据 func getIndicators() { // 获取昨天零点和今天零点的时间戳 now := time.Now() yesterday := time.Date(now.Year(), now.Month(), now.Day()-1, 0, 0, 0, 0, time.Local) today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.Local) //1.数据日采集量 whereBidding := map[string]interface{}{ "comeintime": map[string]interface{}{ "$gt": yesterday.Unix(), "$lte": today.Unix(), }, } biddingCount := MgoB.Count("bidding", whereBidding) if biddingCount == 0 { SendMail("数据昨日采为0", "请检查相关流程") } Rest["date"] = yesterday.Format("2006-01-02") Rest["数据日采集量"] = biddingCount log.Info("getIndicators", zap.Int("数据日采集量", biddingCount)) //2. 统计爬虫总量 whereT := map[string]interface{}{ "state": map[string]interface{}{ "$ne": []interface{}{4, 10}, }, } collectAll := MgoC.Count("luaconfig", whereT) Rest["爬虫总量"] = collectAll log.Info("getIndicators", zap.Int("爬虫总量", collectAll)) //3. 爬虫异常数量 whereCollectErr := map[string]interface{}{ "l_comeintime": map[string]interface{}{ "$gt": yesterday.Unix(), "$lte": today.Unix(), }, } collectErrCount := MgoC.Count("task", whereCollectErr) Rest["爬虫日异常量"] = collectErrCount errPercentage := (float64(collectErrCount) / float64(collectAll)) * 100.0 Rest["爬虫日异常量比例"] = fmt.Sprintf("%.2f%%", errPercentage) log.Info("getIndicators", zap.Int("爬虫日异常量", collectErrCount)) //4.爬虫上架时效(小时) // 获取星期几 dayOfWeek := now.Weekday() // 判断是否为周一,每周日 统计一次 上周 周一到周日 爬虫上架时效 if dayOfWeek == time.Sunday { lastSunday := time.Date(now.Year(), now.Month(), now.Day()-13, 0, 0, 0, 0, time.Local) lastMonday := time.Date(now.Year(), now.Month(), now.Day()-6, 0, 0, 0, 0, time.Local) //fmt.Println(lastMonday) whereShelves := map[string]interface{}{ "comeintime": map[string]interface{}{ "$gt": lastSunday.Unix(), "$lte": lastMonday.Unix(), }, } shelves, _ := MgoC.Find("luaconfig", whereShelves, nil, map[string]interface{}{"code": 1, "comeintime": 1}, false, -1, -1) if len(*shelves) > 0 { shelvesCount := int64(0) shelvesTime := int64(0) for _, v := range *shelves { code := utils.ObjToString(v["code"]) shelveNew, _ := MgoC.FindOne("lua_logs_auditor_new", map[string]interface{}{"code": code, "types": "审核"}) fmt.Println(shelveNew) if shelveNew == nil { continue } else { shelvesCount++ comeintimeNew := utils.Int64All((*shelveNew)["comeintime"]) comeintime := utils.Int64All(v["comeintime"]) shelvesTime = shelvesTime + comeintimeNew - comeintime } } Rest["爬虫上架时效"] = (shelvesTime / shelvesCount) / 3600 log.Info("getIndicators", zap.Any("爬虫上架时效", (shelvesTime/shelvesCount)/3600)) } } //5.竞品覆盖率,每周4统计上周的数据 sessC := MgoC.GetMgoConn() defer MgoC.DestoryMongoConn(sessC) if dayOfWeek == time.Thursday { //获取上周四,千里马的招标数据;然后获取标讯前后个3天,一共7天的所有数据,对比看标题或者项目名称是否存在 lastWednesday := time.Date(now.Year(), now.Month(), now.Day()-8, 0, 0, 0, 0, time.Local) //lastThursday := time.Date(now.Year(), now.Month(), now.Day()-7, 0, 0, 0, 0, time.Local) whereQlm := map[string]interface{}{ "publishtime": lastWednesday.Format("2006-01-02"), "site": "千里马", } query := sessC.DB("qlm").C("data_merge").Find(whereQlm).Select(map[string]interface{}{"title": 1, "projectname": 1}).Iter() count := 0 qlmDatas := make([]map[string]interface{}, 0) for tmp := make(map[string]interface{}); query.Next(tmp); count++ { data := map[string]interface{}{ "title": tmp["title"], "projectname": tmp["projectname"], } qlmDatas = append(qlmDatas, data) } log.Info("getIndicators", zap.Int("千里马上周三总数", count)) biddingWhere := map[string]interface{}{ "publishtime": map[string]interface{}{ "$gt": lastWednesday.AddDate(0, 0, -3).Unix(), "$lte": lastWednesday.AddDate(0, 0, 4).Unix(), }, } biddingDatas, _ := MgoB.Find("bidding", biddingWhere, nil, map[string]interface{}{"title": 1, "projectname": 1}, false, -1, -1) log.Info("getIndicators", zap.Int("标讯一周总数", len(*biddingDatas))) // 将切片B中的标题和项目名称分别存储在哈希表中 titlesInB, projectsInB := getUniqueFields(*biddingDatas) matchs := countMatches(qlmDatas, titlesInB, projectsInB) Rest["竞品覆盖率-详情"] = map[string]interface{}{ "date": lastWednesday.Format("2006-01-02"), "count": count, "matchs": matchs, } Rest["竞品覆盖率"] = fmt.Sprintf("%.2f%%", float64(matchs)/float64(count)*100) log.Info("getIndicators", zap.String("竞品覆盖率", fmt.Sprintf("%.2f%%", float64(matchs)/float64(count)*100))) } //6.数据整体流程均耗时(分钟) sessB := MgoB.GetMgoConn() defer MgoB.DestoryMongoConn(sessB) fd := bson.M{"extracttype": 1, "sensitive": 1, "dataging": 1, "site": 1, "infoformat": 1, "comeintime": 1, "pici": 1, "publishtime": 1, "competehref": 1, "attach_text": 1} query := sessB.DB("qfw").C("bidding").Find(whereBidding).Select(fd).Iter() biddingRealCount := 0 comein_publish_totaltime := int64(0) //comeintime 和 生索引 publish 时间 差值的总和 pici_comein_totaltime := int64(0) //publishtime 和 生索引 pici 时间 差值的总和 for tmp := make(map[string]interface{}); query.Next(tmp); { if utils.IntAll(tmp["extracttype"]) != -1 && utils.ObjToString(tmp["sensitive"]) != "测试" && utils.IntAll(tmp["dataging"]) != 1 && utils.Float64All(tmp["infoformat"]) != 3 { comeintime := utils.Int64All(tmp["comeintime"]) publishtime := utils.Int64All(tmp["publishtime"]) pici := utils.Int64All(tmp["pici"]) if (comeintime-publishtime) < 12*60*60 && pici > 0 { biddingRealCount++ diff1 := comeintime - publishtime diff2 := pici - comeintime comein_publish_totaltime += diff1 pici_comein_totaltime += diff2 } } } if biddingRealCount > 0 { comein_publish_avgtime := comein_publish_totaltime / int64(biddingRealCount) pici_comein_avgtime := pici_comein_totaltime / int64(biddingRealCount) Rest["数据整体流程均耗时(分钟)"] = comein_publish_avgtime / 60 Rest["数据处理均耗时(分钟)"] = pici_comein_avgtime / 60 log.Info("getIndicators", zap.Any("数据整体流程均耗时(分钟)", comein_publish_avgtime/60)) log.Info("getIndicators", zap.Any("数据处理均耗时(分钟)", pici_comein_avgtime/60)) } //7.数据行质量合格率,暂时写死 Rest["数据行质量合格率"] = "90%" MgoB.Save("bidding_zhibiao", Rest) }