package timetask import ( qu "qfw/util" "strings" "time" "util" ) type WarnInfo struct { Fields map[string]bool MaxLevel int Data interface{} Site interface{} Channel interface{} Title interface{} Info interface{} Code interface{} Href interface{} } func GetSpiderWarnData() { defer qu.Catch() qu.Debug("准备spider_warn_err数据") stime := util.GetTime(-1) etime := util.GetTime(0) if time.Now().Weekday().String() == "Monday" { stime = util.GetTime(-3) } query := map[string]interface{}{ "comeintime": map[string]interface{}{ "$gte": stime, "$lt": etime, }, "info": map[string]interface{}{ "$in": []string{"Field Value Contains Random Code", "Publishtime Is Too Early", "Publishtime Is Too Late", "Field Value Not Contains Chinese"}, }, } tmp := map[string]*WarnInfo{} list, _ := util.MgoS.Find("spider_warn", query, nil, nil, false, -1, -1) qu.Debug("query:", query, len(*list)) for _, l := range *list { href := qu.ObjToString(l["href"]) level := qu.IntAll(l["level"]) field := qu.ObjToString(l["field"]) if field == "publishtime" { //特殊处理publishtime字段的level(保存服务中publishtime异常数据入bidding库,level不能为2) level = 1 } if warnInfo := tmp[href]; warnInfo == nil { warnInfo = &WarnInfo{ Fields: map[string]bool{field: true}, MaxLevel: level, Data: l["data"], Site: l["site"], Channel: l["channel"], Title: l["title"], Info: l["info"], Code: l["code"], Href: href, } tmp[href] = warnInfo } else { warnInfo.Fields[field] = true if warnInfo.MaxLevel < level { warnInfo.MaxLevel = level } } } for _, wi := range tmp { fields := []string{} for f, _ := range wi.Fields { fields = append(fields, f) } util.MgoS.Save("spider_warn_err", map[string]interface{}{ "field": strings.Join(fields, ","), "level": wi.MaxLevel, "site": wi.Site, "channel": wi.Channel, "title": wi.Title, "comeintime": time.Now().Unix(), "info": wi.Info, "code": wi.Code, "href": wi.Href, "data": wi.Data, "ok": false, }) } } /* 每天定时推送含乱码数据 */ // var ( // RandomDataPushCron string // Gmail *gm.GmailAuth // To string // ) // type FileWrite struct { // Byte *bytes.Buffer // } // func (fw *FileWrite) Write(p []byte) (n int, err error) { // n, err = fw.Byte.Write(p) // return // } //PushRandomData 推送乱码数据 // func PushRandomData() { // defer qu.Catch() // query := map[string]interface{}{ // //"comeintime": map[string]interface{}{ // // "$gte": GetTime(-1), // // "$lt": GetTime(0), // //}, // "info": map[string]interface{}{ // "$in": []string{"Field Value Not Contains Chinese"}, // }, // } // list, _ := MgoS.Find("spider_warn", query, nil, nil, false, -1, -1) // if len(*list) > 0 { // file := xlsx.NewFile() // sheet, _ := file.AddSheet("乱码数据") // row := sheet.AddRow() // row.AddCell().SetValue("站点") // row.AddCell().SetValue("栏目") // row.AddCell().SetValue("爬虫") // row.AddCell().SetValue("字段") // row.AddCell().SetValue("异常等级") // row.AddCell().SetValue("标题") // row.AddCell().SetValue("链接") // for _, l := range *list { // textRow := sheet.AddRow() // textRow.AddCell().SetValue(qu.ObjToString(l["site"])) // textRow.AddCell().SetValue(qu.ObjToString(l["channel"])) // textRow.AddCell().SetValue(qu.ObjToString(l["code"])) // textRow.AddCell().SetValue(qu.ObjToString(l["field"])) // level := qu.IntAll(l["level"]) // if level == 1 { // textRow.AddCell().SetValue("警告") // } else if level == 2 { // textRow.AddCell().SetValue("错误") // } // textRow.AddCell().SetValue(qu.ObjToString(l["title"])) // textRow.AddCell().SetValue(qu.ObjToString(l["href"])) // } // fw := &FileWrite{ // Byte: &bytes.Buffer{}, // } // file.Write(fw) // bt := fw.Byte.Bytes() // gm.GSendMail_Bq("jy@jianyu360.cn", To, "", "", "乱码数据统计", "", "统计报表.xlsx", bt, Gmail) // } // }