main.go 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. package main
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. mgo "mongodb"
  6. "os"
  7. qu "qfw/util"
  8. "regexp"
  9. "time"
  10. "github.com/cron"
  11. "github.com/donnie4w/go-logger/logger"
  12. )
  13. var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
  14. var (
  15. Config map[string]interface{}
  16. //User map[string]string
  17. MgoE *mgo.MongodbSim //editor
  18. MgoS *mgo.MongodbSim //spider
  19. )
  20. func init() {
  21. qu.ReadConfig(&Config)
  22. //qu.ReadConfig("./user.json", &User)
  23. //mgo
  24. spider := Config["spider"].(map[string]interface{})
  25. MgoS = &mgo.MongodbSim{
  26. MongodbAddr: qu.ObjToString(spider["addr"]),
  27. DbName: qu.ObjToString(spider["db"]),
  28. Size: qu.IntAll(spider["size"]),
  29. }
  30. MgoS.InitPool()
  31. editor := Config["editor"].(map[string]interface{})
  32. MgoE = &mgo.MongodbSim{
  33. MongodbAddr: qu.ObjToString(editor["addr"]),
  34. DbName: qu.ObjToString(editor["db"]),
  35. Size: qu.IntAll(editor["size"]),
  36. }
  37. MgoE.InitPool()
  38. //logs
  39. logger.SetRollingDaily("./logs", "task.log")
  40. //爬虫上下浮动率
  41. DownloadCheck = make(map[string]*DC)
  42. downloadcheck := Config["downloadcheck"].(map[string]interface{})
  43. for _, tmp := range downloadcheck {
  44. tmpMap := tmp.(map[string]interface{})
  45. downratio := qu.Float64All(tmpMap["downratio"])
  46. uptatio := qu.Float64All(tmpMap["upratio"])
  47. codes := tmpMap["spidercode"].([]interface{})
  48. if len(codes) > 0 {
  49. for _, code := range codes {
  50. c := qu.ObjToString(code)
  51. DownloadCheck[c] = &DC{
  52. DownRatio: 1.0 - downratio/100.0,
  53. UpRatio: 1.0 + uptatio/100.0,
  54. }
  55. }
  56. } else {
  57. DownloadCheck["other"] = &DC{
  58. DownRatio: 1.0 - downratio/100.0,
  59. UpRatio: 1.0 + uptatio/100.0,
  60. }
  61. }
  62. }
  63. StartTaskCron = qu.ObjToString(Config["startaskcron"])
  64. UpdateStateCron = qu.ObjToString(Config["updatestatecron"])
  65. CodeSummaryCron = qu.ObjToString(Config["codesummarycron"])
  66. CloseNum = qu.IntAll(Config["closenum"])
  67. DayNum = qu.IntAll(Config["daynum"])
  68. //
  69. YearMinDownloadNum = qu.IntAll(Config["yearmindownload"])
  70. IntervalMaxNum = qu.IntAll(Config["intervalmaxnum"])
  71. }
  72. func main() {
  73. go clearLogs()
  74. //低采集量爬虫新建任务
  75. LuaYearMinCodeCreateTask()
  76. c := cron.New()
  77. c.Start()
  78. c.AddFunc(StartTaskCron, StartTask) //开始任务
  79. //c.AddFunc(CodeSummaryCron, SummaryCode) //上架爬虫信息汇总
  80. //c.AddFunc(UpdateStateCron, ResetDataState) //更新数据状态
  81. //统计爬虫历史下载量制定任务周期
  82. // GetSpidercode()
  83. // TagCode()
  84. // CycleTime()
  85. ch := make(chan bool, 1)
  86. <-ch
  87. }
  88. func clearLogs() {
  89. fmt.Println("=======clearLogs========")
  90. timeInt := time.Now().AddDate(0, 0, -30).Unix()
  91. dirs, err := ioutil.ReadDir("./logs")
  92. if err == nil {
  93. for _, f := range dirs {
  94. fname := f.Name()
  95. logTimeStr := timeReg.FindString(fname)
  96. if logTimeStr == "" {
  97. continue
  98. }
  99. logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local)
  100. if logTimeInt.Unix() < timeInt {
  101. os.Remove("./logs/" + fname)
  102. }
  103. }
  104. }
  105. time.AfterFunc(24*time.Hour, clearLogs)
  106. }