123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154 |
- package timetask
- import (
- qu "qfw/util"
- "strings"
- "time"
- "util"
- )
- type WarnInfo struct {
- Fields map[string]bool
- MaxLevel int
- Data interface{}
- Site interface{}
- Channel interface{}
- Title interface{}
- Info interface{}
- Code interface{}
- Href interface{}
- }
- func GetSpiderWarnData() {
- defer qu.Catch()
- qu.Debug("准备spider_warn_err数据")
- stime := util.GetTime(-1)
- etime := util.GetTime(0)
- if time.Now().Weekday().String() == "Monday" {
- stime = util.GetTime(-3)
- }
- query := map[string]interface{}{
- "comeintime": map[string]interface{}{
- "$gte": stime,
- "$lt": etime,
- },
- "info": map[string]interface{}{
- "$in": []string{"Field Value Contains Random Code", "Publishtime Is Too Early", "Publishtime Is Too Late", "Field Value Not Contains Chinese"},
- },
- }
- tmp := map[string]*WarnInfo{}
- list, _ := util.MgoS.Find("spider_warn", query, nil, nil, false, -1, -1)
- qu.Debug("query:", query, len(*list))
- for _, l := range *list {
- href := qu.ObjToString(l["href"])
- level := qu.IntAll(l["level"])
- field := qu.ObjToString(l["field"])
- if field == "publishtime" { //特殊处理publishtime字段的level(保存服务中publishtime异常数据入bidding库,level不能为2)
- level = 1
- }
- if warnInfo := tmp[href]; warnInfo == nil {
- warnInfo = &WarnInfo{
- Fields: map[string]bool{field: true},
- MaxLevel: level,
- Data: l["data"],
- Site: l["site"],
- Channel: l["channel"],
- Title: l["title"],
- Info: l["info"],
- Code: l["code"],
- Href: href,
- }
- tmp[href] = warnInfo
- } else {
- warnInfo.Fields[field] = true
- if warnInfo.MaxLevel < level {
- warnInfo.MaxLevel = level
- }
- }
- }
- for _, wi := range tmp {
- fields := []string{}
- for f, _ := range wi.Fields {
- fields = append(fields, f)
- }
- util.MgoS.Save("spider_warn_err", map[string]interface{}{
- "field": strings.Join(fields, ","),
- "level": wi.MaxLevel,
- "site": wi.Site,
- "channel": wi.Channel,
- "title": wi.Title,
- "comeintime": time.Now().Unix(),
- "info": wi.Info,
- "code": wi.Code,
- "href": wi.Href,
- "data": wi.Data,
- "ok": false,
- })
- }
- }
- /*
- 每天定时推送含乱码数据
- */
- // var (
- // RandomDataPushCron string
- // Gmail *gm.GmailAuth
- // To string
- // )
- // type FileWrite struct {
- // Byte *bytes.Buffer
- // }
- // func (fw *FileWrite) Write(p []byte) (n int, err error) {
- // n, err = fw.Byte.Write(p)
- // return
- // }
- //PushRandomData 推送乱码数据
- // func PushRandomData() {
- // defer qu.Catch()
- // query := map[string]interface{}{
- // //"comeintime": map[string]interface{}{
- // // "$gte": GetTime(-1),
- // // "$lt": GetTime(0),
- // //},
- // "info": map[string]interface{}{
- // "$in": []string{"Field Value Not Contains Chinese"},
- // },
- // }
- // list, _ := MgoS.Find("spider_warn", query, nil, nil, false, -1, -1)
- // if len(*list) > 0 {
- // file := xlsx.NewFile()
- // sheet, _ := file.AddSheet("乱码数据")
- // row := sheet.AddRow()
- // row.AddCell().SetValue("站点")
- // row.AddCell().SetValue("栏目")
- // row.AddCell().SetValue("爬虫")
- // row.AddCell().SetValue("字段")
- // row.AddCell().SetValue("异常等级")
- // row.AddCell().SetValue("标题")
- // row.AddCell().SetValue("链接")
- // for _, l := range *list {
- // textRow := sheet.AddRow()
- // textRow.AddCell().SetValue(qu.ObjToString(l["site"]))
- // textRow.AddCell().SetValue(qu.ObjToString(l["channel"]))
- // textRow.AddCell().SetValue(qu.ObjToString(l["code"]))
- // textRow.AddCell().SetValue(qu.ObjToString(l["field"]))
- // level := qu.IntAll(l["level"])
- // if level == 1 {
- // textRow.AddCell().SetValue("警告")
- // } else if level == 2 {
- // textRow.AddCell().SetValue("错误")
- // }
- // textRow.AddCell().SetValue(qu.ObjToString(l["title"]))
- // textRow.AddCell().SetValue(qu.ObjToString(l["href"]))
- // }
- // fw := &FileWrite{
- // Byte: &bytes.Buffer{},
- // }
- // file.Write(fw)
- // bt := fw.Byte.Bytes()
- // gm.GSendMail_Bq("jy@jianyu360.cn", To, "", "", "乱码数据统计", "", "统计报表.xlsx", bt, Gmail)
- // }
- // }
|