random.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. package timetask
  2. import (
  3. qu "qfw/util"
  4. "strings"
  5. "time"
  6. "util"
  7. )
  8. type WarnInfo struct {
  9. Fields map[string]bool
  10. MaxLevel int
  11. Data interface{}
  12. Site interface{}
  13. Channel interface{}
  14. Title interface{}
  15. Info interface{}
  16. Code interface{}
  17. Href interface{}
  18. }
  19. func GetSpiderWarnData() {
  20. defer qu.Catch()
  21. qu.Debug("准备spider_warn_err数据")
  22. stime := util.GetTime(-1)
  23. etime := util.GetTime(0)
  24. if time.Now().Weekday().String() == "Monday" {
  25. stime = util.GetTime(-3)
  26. }
  27. query := map[string]interface{}{
  28. "comeintime": map[string]interface{}{
  29. "$gte": stime,
  30. "$lt": etime,
  31. },
  32. "info": map[string]interface{}{
  33. "$in": []string{"Field Value Contains Random Code", "Publishtime Is Too Early", "Publishtime Is Too Late", "Field Value Not Contains Chinese"},
  34. },
  35. }
  36. tmp := map[string]*WarnInfo{}
  37. list, _ := util.MgoS.Find("spider_warn", query, nil, nil, false, -1, -1)
  38. qu.Debug("query:", query, len(*list))
  39. for _, l := range *list {
  40. href := qu.ObjToString(l["href"])
  41. level := qu.IntAll(l["level"])
  42. field := qu.ObjToString(l["field"])
  43. if field == "publishtime" { //特殊处理publishtime字段的level(保存服务中publishtime异常数据入bidding库,level不能为2)
  44. level = 1
  45. }
  46. if warnInfo := tmp[href]; warnInfo == nil {
  47. warnInfo = &WarnInfo{
  48. Fields: map[string]bool{field: true},
  49. MaxLevel: level,
  50. Data: l["data"],
  51. Site: l["site"],
  52. Channel: l["channel"],
  53. Title: l["title"],
  54. Info: l["info"],
  55. Code: l["code"],
  56. Href: href,
  57. }
  58. tmp[href] = warnInfo
  59. } else {
  60. warnInfo.Fields[field] = true
  61. if warnInfo.MaxLevel < level {
  62. warnInfo.MaxLevel = level
  63. }
  64. }
  65. }
  66. for _, wi := range tmp {
  67. fields := []string{}
  68. for f, _ := range wi.Fields {
  69. fields = append(fields, f)
  70. }
  71. util.MgoS.Save("spider_warn_err", map[string]interface{}{
  72. "field": strings.Join(fields, ","),
  73. "level": wi.MaxLevel,
  74. "site": wi.Site,
  75. "channel": wi.Channel,
  76. "title": wi.Title,
  77. "comeintime": time.Now().Unix(),
  78. "info": wi.Info,
  79. "code": wi.Code,
  80. "href": wi.Href,
  81. "data": wi.Data,
  82. "ok": false,
  83. })
  84. }
  85. }
  86. /*
  87. 每天定时推送含乱码数据
  88. */
  89. // var (
  90. // RandomDataPushCron string
  91. // Gmail *gm.GmailAuth
  92. // To string
  93. // )
  94. // type FileWrite struct {
  95. // Byte *bytes.Buffer
  96. // }
  97. // func (fw *FileWrite) Write(p []byte) (n int, err error) {
  98. // n, err = fw.Byte.Write(p)
  99. // return
  100. // }
  101. //PushRandomData 推送乱码数据
  102. // func PushRandomData() {
  103. // defer qu.Catch()
  104. // query := map[string]interface{}{
  105. // //"comeintime": map[string]interface{}{
  106. // // "$gte": GetTime(-1),
  107. // // "$lt": GetTime(0),
  108. // //},
  109. // "info": map[string]interface{}{
  110. // "$in": []string{"Field Value Not Contains Chinese"},
  111. // },
  112. // }
  113. // list, _ := MgoS.Find("spider_warn", query, nil, nil, false, -1, -1)
  114. // if len(*list) > 0 {
  115. // file := xlsx.NewFile()
  116. // sheet, _ := file.AddSheet("乱码数据")
  117. // row := sheet.AddRow()
  118. // row.AddCell().SetValue("站点")
  119. // row.AddCell().SetValue("栏目")
  120. // row.AddCell().SetValue("爬虫")
  121. // row.AddCell().SetValue("字段")
  122. // row.AddCell().SetValue("异常等级")
  123. // row.AddCell().SetValue("标题")
  124. // row.AddCell().SetValue("链接")
  125. // for _, l := range *list {
  126. // textRow := sheet.AddRow()
  127. // textRow.AddCell().SetValue(qu.ObjToString(l["site"]))
  128. // textRow.AddCell().SetValue(qu.ObjToString(l["channel"]))
  129. // textRow.AddCell().SetValue(qu.ObjToString(l["code"]))
  130. // textRow.AddCell().SetValue(qu.ObjToString(l["field"]))
  131. // level := qu.IntAll(l["level"])
  132. // if level == 1 {
  133. // textRow.AddCell().SetValue("警告")
  134. // } else if level == 2 {
  135. // textRow.AddCell().SetValue("错误")
  136. // }
  137. // textRow.AddCell().SetValue(qu.ObjToString(l["title"]))
  138. // textRow.AddCell().SetValue(qu.ObjToString(l["href"]))
  139. // }
  140. // fw := &FileWrite{
  141. // Byte: &bytes.Buffer{},
  142. // }
  143. // file.Write(fw)
  144. // bt := fw.Byte.Bytes()
  145. // gm.GSendMail_Bq("jy@jianyu360.cn", To, "", "", "乱码数据统计", "", "统计报表.xlsx", bt, Gmail)
  146. // }
  147. // }