main.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. package main
  2. import (
  3. codegrpc "analysiscode/client"
  4. _ "filter"
  5. "fmt"
  6. gojs "gorunjs/client"
  7. "io/ioutil"
  8. "os"
  9. "spider"
  10. mgo "mongodb"
  11. qu "qfw/util"
  12. es "qfw/util/elastic.v7"
  13. "regexp"
  14. "runtime"
  15. . "spiderutil"
  16. "time"
  17. //"qfw/util/redis"
  18. "github.com/donnie4w/go-logger/logger"
  19. "github.com/go-xweb/xweb"
  20. "github.com/yuin/gopher-lua"
  21. )
  22. var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
  23. func init() {
  24. qu.ReadConfig(&Config)
  25. //初始化lua重载函数
  26. lua.Disablelib(Config.Luadisablelib)
  27. //初始化数据库
  28. InitMgo()
  29. spider.InitOther() //加载其他信息
  30. //验证码识别client
  31. codegrpc.InitCodeGrpcClient()
  32. //go执行js服务
  33. gojs.InitGoRunJsClient()
  34. InitRedisClient(Config.Redisservers) //初始化Redis
  35. InitBloomRedisClient(Config.BloomRedisservers) //初始化Bloom Redis
  36. //初始化es
  37. spider.EsIndex = qu.ObjToString(Config.Es["index"])
  38. spider.EsType = qu.ObjToString(Config.Es["type"])
  39. spider.Es = &es.Elastic{
  40. S_esurl: qu.ObjToString(Config.Es["addr"]),
  41. I_size: qu.IntAll(Config.Es["pool"]),
  42. }
  43. spider.Es.InitElasticSize()
  44. //启动消息服务
  45. spider.InitMsgClient(Config.Msgserveraddr, Config.Msgname)
  46. spider.InitMsgClientFile(Config.MsgserveraddrFile, Config.Msgname+"file")
  47. spider.InitMsgClientChromedp(Config.MsgserveraddrChromedp, Config.Msgname+"chromedp")
  48. //初始化网络存储服务
  49. //InitWeedcl()
  50. OssInit(
  51. qu.ObjToString(Config.OssInfo["ossEndpoint"]),
  52. qu.ObjToString(Config.OssInfo["ossAccessKeyId"]),
  53. qu.ObjToString(Config.OssInfo["ossAccessKeySecret"]),
  54. qu.ObjToString(Config.OssInfo["ossBucketName"]),
  55. )
  56. //xweb框架配置
  57. logger.SetConsole(false)
  58. if Config.LogLevel <= 1 {
  59. logger.SetLevel(logger.DEBUG)
  60. } else if Config.LogLevel == 2 {
  61. logger.SetLevel(logger.INFO)
  62. } else {
  63. logger.SetLevel(logger.WARN)
  64. }
  65. logger.SetRollingDaily("./logs", "spider.log")
  66. xweb.Config.RecoverPanic = true
  67. xweb.Config.Profiler = true
  68. xweb.RootApp().AppConfig.TemplateDir = "web/templates"
  69. xweb.RootApp().AppConfig.StaticDir = "web/staticres"
  70. xweb.RootApp().AppConfig.StaticFileVersion = false
  71. xweb.RootApp().AppConfig.CheckXsrf = false
  72. xweb.RootApp().AppConfig.ReloadTemplates = false
  73. xweb.RootApp().AppConfig.EnableHttpCache = false
  74. xweb.RootApp().AppConfig.Mode = xweb.Product
  75. xweb.RootApp().AppConfig.CacheTemplates = false
  76. xweb.AddAction(&spider.Front{})
  77. xweb.RootApp().AppConfig.SessionTimeout = 30 * time.Minute
  78. xweb.RootApp().Logger.SetOutputLevel(4)
  79. }
  80. //初始化数据源
  81. func InitMgo() {
  82. spider.MgoS = &mgo.MongodbSim{
  83. MongodbAddr: Config.Mongodb_spider,
  84. Size: Config.Spider_dbsize,
  85. DbName: "spider",
  86. }
  87. spider.MgoS.InitPool()
  88. //spider.MgoE = &mgo.MongodbSim{
  89. // MongodbAddr: Config.Mongodb_editor,
  90. // Size: Config.Editor_dbsize,
  91. // DbName: "editor",
  92. //}
  93. //spider.MgoE.InitPool()
  94. spider.MgoEB = &mgo.MongodbSim{
  95. MongodbAddr: Config.BidEditor.Addr,
  96. Size: Config.BidEditor.Size,
  97. DbName: Config.BidEditor.Db,
  98. UserName: Config.BidEditor.Username,
  99. Password: Config.BidEditor.Password,
  100. }
  101. spider.MgoEB.InitPool()
  102. }
  103. //
  104. func main() {
  105. //临时统计总的线程数
  106. go spider.AllThreadLog()
  107. //定时上传流量信息
  108. //go spider.TimeTask()
  109. //定时清理日志
  110. go clearLogs()
  111. //初始化爬虫服务
  112. go spider.InitSpider()
  113. //清理计数
  114. go spider.GcCount()
  115. //定时重载脚本文件
  116. go spider.ReloadSpiderFile()
  117. //内存信息
  118. go heapprint()
  119. //查列表页信息采集三级页
  120. go spider.DetailData()
  121. //处理心跳信息
  122. go spider.SaveHeartInfo()
  123. //批量保存心跳信息
  124. go spider.UpdateHeartInfo()
  125. //7000历史节点下载详情页
  126. go spider.HistoryEventDownloadDetail()
  127. //批量保存data_bak
  128. //go spider.SaveDataBak()
  129. //批量保存错误数据
  130. //go spider.UpdateErrDataMgo()
  131. //爬虫信息提交编辑器
  132. //go spider.SpiderInfoSend()
  133. //保存爬虫采集非本站点数据
  134. //go spider.SaveOtherSiteData()
  135. logger.Debug(Config.Webport)
  136. xweb.Run(":" + Config.Webport)
  137. }
  138. func heapprint() {
  139. var m runtime.MemStats
  140. runtime.ReadMemStats(&m)
  141. fmt.Printf("申请内存:%dM,分配内存:%dM,未使用内存:%dM,回收内存:%dM\n", m.HeapSys/(1024*1024), m.HeapAlloc/(1024*1024),
  142. m.HeapIdle/(1024*1024), m.HeapReleased/(1024*1024))
  143. time.AfterFunc(1*time.Minute, heapprint)
  144. }
  145. func clearLogs() {
  146. fmt.Println("=======clearLogs========")
  147. timeInt := time.Now().AddDate(0, 0, -30).Unix()
  148. dirs, err := ioutil.ReadDir("./logs")
  149. if err == nil {
  150. for _, f := range dirs {
  151. fname := f.Name()
  152. logTimeStr := timeReg.FindString(fname)
  153. if logTimeStr == "" {
  154. continue
  155. }
  156. logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local)
  157. if logTimeInt.Unix() < timeInt {
  158. os.Remove("./logs/" + fname)
  159. }
  160. }
  161. }
  162. time.AfterFunc(24*time.Hour, clearLogs)
  163. }