main.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. package main
  2. import (
  3. codegrpc "analysiscode/client"
  4. _ "filter"
  5. "fmt"
  6. "io/ioutil"
  7. "os"
  8. "spider"
  9. mgo "mongodb"
  10. qu "qfw/util"
  11. es "qfw/util/elastic"
  12. "regexp"
  13. "runtime"
  14. . "spiderutil"
  15. "time"
  16. //"qfw/util/redis"
  17. "github.com/donnie4w/go-logger/logger"
  18. "github.com/go-xweb/xweb"
  19. "github.com/yuin/gopher-lua"
  20. )
  21. var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
  22. func init() {
  23. qu.ReadConfig(&Config)
  24. //初始化lua重载函数
  25. lua.Disablelib(Config.Luadisablelib)
  26. //初始化数据库
  27. InitMgo()
  28. spider.InitOther() //加载其他信息
  29. //验证码识别client
  30. codegrpc.InitCodeGrpcClient()
  31. //InitRedis(Config.Redisservers) //初始化Redis
  32. //redis集群
  33. InitRedisCluster(Config.RedisClusterAddrs)
  34. //初始化es
  35. spider.EsIndex = qu.ObjToString(Config.Es["index"])
  36. spider.EsType = qu.ObjToString(Config.Es["type"])
  37. spider.Es = &es.Elastic{
  38. S_esurl: qu.ObjToString(Config.Es["addr"]),
  39. I_size: qu.IntAll(Config.Es["pool"]),
  40. }
  41. spider.Es.InitElasticSize()
  42. //启动消息服务
  43. spider.InitMsgClient(Config.Msgserveraddr, Config.Msgname)
  44. spider.InitMsgClientFile(Config.MsgserveraddrFile, Config.Msgname+"file")
  45. //初始化网络存储服务
  46. //InitWeedcl()
  47. OssInit(
  48. qu.ObjToString(Config.OssInfo["ossEndpoint"]),
  49. qu.ObjToString(Config.OssInfo["ossAccessKeyId"]),
  50. qu.ObjToString(Config.OssInfo["ossAccessKeySecret"]),
  51. qu.ObjToString(Config.OssInfo["ossBucketName"]),
  52. )
  53. //xweb框架配置
  54. logger.SetConsole(false)
  55. if Config.LogLevel <= 1 {
  56. logger.SetLevel(logger.DEBUG)
  57. } else if Config.LogLevel == 2 {
  58. logger.SetLevel(logger.INFO)
  59. } else {
  60. logger.SetLevel(logger.WARN)
  61. }
  62. logger.SetRollingDaily("./logs", "spider.log")
  63. xweb.Config.RecoverPanic = true
  64. xweb.Config.Profiler = true
  65. xweb.RootApp().AppConfig.TemplateDir = "web/templates"
  66. xweb.RootApp().AppConfig.StaticDir = "web/staticres"
  67. xweb.RootApp().AppConfig.StaticFileVersion = false
  68. xweb.RootApp().AppConfig.CheckXsrf = false
  69. xweb.RootApp().AppConfig.ReloadTemplates = false
  70. xweb.RootApp().AppConfig.EnableHttpCache = false
  71. xweb.RootApp().AppConfig.Mode = xweb.Product
  72. xweb.RootApp().AppConfig.CacheTemplates = false
  73. xweb.AddAction(&spider.Front{})
  74. xweb.RootApp().AppConfig.SessionTimeout = 30 * time.Minute
  75. xweb.RootApp().Logger.SetOutputLevel(4)
  76. }
  77. //初始化数据源
  78. func InitMgo() {
  79. spider.MgoS = &mgo.MongodbSim{
  80. MongodbAddr: Config.Mongodb_spider,
  81. Size: Config.Spider_dbsize,
  82. DbName: "spider",
  83. }
  84. spider.MgoS.InitPool()
  85. //spider.MgoE = &mgo.MongodbSim{
  86. // MongodbAddr: Config.Mongodb_editor,
  87. // Size: Config.Editor_dbsize,
  88. // DbName: "editor",
  89. //}
  90. //spider.MgoE.InitPool()
  91. spider.MgoEB = &mgo.MongodbSim{
  92. MongodbAddr: Config.BidEditor.Addr,
  93. Size: Config.BidEditor.Size,
  94. DbName: Config.BidEditor.Db,
  95. UserName: Config.BidEditor.Username,
  96. Password: Config.BidEditor.Password,
  97. }
  98. spider.MgoEB.InitPool()
  99. }
  100. //
  101. func main() {
  102. //临时统计总的线程数
  103. go spider.AllThreadLog()
  104. //定时上传流量信息
  105. //go spider.TimeTask()
  106. //定时清理日志
  107. go clearLogs()
  108. //初始化爬虫服务
  109. go spider.InitSpider()
  110. //清理计数
  111. go spider.GcCount()
  112. //定时重载脚本文件
  113. go spider.ReloadSpiderFile()
  114. //内存信息
  115. go heapprint()
  116. //查列表页信息采集三级页
  117. go spider.DetailData()
  118. //批量保存错误数据
  119. go spider.UpdateErrDataMgo()
  120. //处理心跳信息
  121. go spider.SaveHeartInfo()
  122. //批量保存心跳信息
  123. go spider.UpdateHeartInfo()
  124. //7000历史节点下载详情页
  125. go spider.HistoryEventDownloadDetail()
  126. //爬虫信息提交编辑器
  127. //go spider.SpiderInfoSend()
  128. //保存爬虫采集非本站点数据
  129. //go spider.SaveOtherSiteData()
  130. logger.Debug(Config.Webport)
  131. xweb.Run(":" + Config.Webport)
  132. }
  133. func heapprint() {
  134. var m runtime.MemStats
  135. runtime.ReadMemStats(&m)
  136. fmt.Printf("申请内存:%dM,分配内存:%dM,未使用内存:%dM,回收内存:%dM\n", m.HeapSys/(1024*1024), m.HeapAlloc/(1024*1024),
  137. m.HeapIdle/(1024*1024), m.HeapReleased/(1024*1024))
  138. time.AfterFunc(1*time.Minute, heapprint)
  139. }
  140. func clearLogs() {
  141. fmt.Println("=======clearLogs========")
  142. timeInt := time.Now().AddDate(0, 0, -30).Unix()
  143. dirs, err := ioutil.ReadDir("./logs")
  144. if err == nil {
  145. for _, f := range dirs {
  146. fname := f.Name()
  147. logTimeStr := timeReg.FindString(fname)
  148. if logTimeStr == "" {
  149. continue
  150. }
  151. logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local)
  152. if logTimeInt.Unix() < timeInt {
  153. os.Remove("./logs/" + fname)
  154. }
  155. }
  156. }
  157. time.AfterFunc(24*time.Hour, clearLogs)
  158. }