main.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. package main
  2. import (
  3. _ "filter"
  4. "fmt"
  5. "io/ioutil"
  6. "os"
  7. "spider"
  8. mgo "mongodb"
  9. qu "qfw/util"
  10. es "qfw/util/elastic"
  11. mgu "qfw/util/mongodbutil"
  12. "regexp"
  13. "runtime"
  14. . "spiderutil"
  15. "time"
  16. //"qfw/util/redis"
  17. "github.com/donnie4w/go-logger/logger"
  18. "github.com/go-xweb/xweb"
  19. "github.com/yuin/gopher-lua"
  20. )
  21. var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
  22. func init() {
  23. qu.ReadConfig(&Config)
  24. //初始化lua重载函数
  25. lua.Disablelib(Config.Luadisablelib)
  26. //初始化数据库
  27. initConfig(Config.Mongodb_spider, "spider", "spider", Config.Spider_dbsize)
  28. initConfig(Config.Mongodb_editor, "editor", "editor", Config.Editor_dbsize)
  29. mgu.InitMongodbPool()
  30. spider.Mgo = &mgo.MongodbSim{
  31. MongodbAddr: Config.Mongodb_spider,
  32. Size: 50,
  33. DbName: "spider",
  34. }
  35. spider.Mgo.InitPool()
  36. //初始化Redis
  37. InitRedis(Config.Redisservers)
  38. // if Config.Redistype == "0" {
  39. // redis.InitRedis(Config.Redisservers)
  40. // } else { //redis集群
  41. // InitRedisCluster(Config.Redishosts, 20, 100, false)
  42. // }
  43. //初始化es
  44. spider.EsIndex = qu.ObjToString(Config.Es["index"])
  45. spider.EsType = qu.ObjToString(Config.Es["type"])
  46. spider.Es = &es.Elastic{
  47. S_esurl: qu.ObjToString(Config.Es["addr"]),
  48. I_size: qu.IntAll(Config.Es["pool"]),
  49. }
  50. spider.Es.InitElasticSize()
  51. //启动消息服务
  52. spider.InitMsgClient(Config.Msgserveraddr, Config.Msgname)
  53. spider.InitMsgClientFile(Config.MsgserveraddrFile, Config.Msgname+"file")
  54. //初始化网络存储服务
  55. //InitWeedcl()
  56. OssInit(
  57. qu.ObjToString(Config.OssInfo["ossEndpoint"]),
  58. qu.ObjToString(Config.OssInfo["ossAccessKeyId"]),
  59. qu.ObjToString(Config.OssInfo["ossAccessKeySecret"]),
  60. qu.ObjToString(Config.OssInfo["ossBucketName"]),
  61. )
  62. //xweb框架配置
  63. logger.SetConsole(false)
  64. if Config.LogLevel <= 1 {
  65. logger.SetLevel(logger.DEBUG)
  66. } else if Config.LogLevel == 2 {
  67. logger.SetLevel(logger.INFO)
  68. } else {
  69. logger.SetLevel(logger.WARN)
  70. }
  71. logger.SetRollingDaily("./logs", "spider.log")
  72. xweb.Config.RecoverPanic = true
  73. xweb.Config.Profiler = true
  74. xweb.RootApp().AppConfig.TemplateDir = "web/templates"
  75. xweb.RootApp().AppConfig.StaticDir = "web/staticres"
  76. xweb.RootApp().AppConfig.StaticFileVersion = false
  77. xweb.RootApp().AppConfig.CheckXsrf = false
  78. xweb.RootApp().AppConfig.ReloadTemplates = false
  79. xweb.RootApp().AppConfig.EnableHttpCache = false
  80. xweb.RootApp().AppConfig.Mode = xweb.Product
  81. xweb.RootApp().AppConfig.CacheTemplates = false
  82. xweb.AddAction(&spider.Front{})
  83. xweb.RootApp().AppConfig.SessionTimeout = 30 * time.Minute
  84. xweb.RootApp().Logger.SetOutputLevel(4)
  85. }
  86. //初始化数据源
  87. func initConfig(addr, alias, db string, dbsize int) {
  88. conf := *new(mgu.PoolConfig)
  89. conf.Addr = addr
  90. conf.Alias = alias
  91. conf.DB = db
  92. conf.Size = dbsize
  93. mgu.Config = append(mgu.Config, conf)
  94. }
  95. //
  96. func main() {
  97. //定时清理日志
  98. go clearLogs()
  99. //初始化爬虫服务
  100. go spider.InitSpider()
  101. //清理计数
  102. go spider.GcCount()
  103. //定时重载脚本文件
  104. go spider.ReloadSpiderFile()
  105. //爬虫信息提交编辑器
  106. go spider.SpiderInfoSend()
  107. //处理心跳信息
  108. go spider.SaveHeartInfo()
  109. //内存信息
  110. go heapprint()
  111. //查列表页信息采集三级页
  112. go spider.DetailData()
  113. //批量保存错误数据
  114. go spider.UpdateErrDataMgo()
  115. //保存爬虫采集非本站点数据
  116. //go spider.SaveOtherSiteData()
  117. //批量保存心跳信息
  118. go spider.UpdateHeartInfo()
  119. logger.Debug(Config.Webport)
  120. xweb.Run(":" + Config.Webport)
  121. }
  122. func heapprint() {
  123. var m runtime.MemStats
  124. runtime.ReadMemStats(&m)
  125. fmt.Printf("申请内存:%dM,分配内存:%dM,未使用内存:%dM,回收内存:%dM\n", m.HeapSys/(1024*1024), m.HeapAlloc/(1024*1024),
  126. m.HeapIdle/(1024*1024), m.HeapReleased/(1024*1024))
  127. time.AfterFunc(1*time.Minute, heapprint)
  128. }
  129. func clearLogs() {
  130. fmt.Println("=======clearLogs========")
  131. timeInt := time.Now().AddDate(0, 0, -30).Unix()
  132. dirs, err := ioutil.ReadDir("./logs")
  133. if err == nil {
  134. for _, f := range dirs {
  135. fname := f.Name()
  136. logTimeStr := timeReg.FindString(fname)
  137. if logTimeStr == "" {
  138. continue
  139. }
  140. logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local)
  141. if logTimeInt.Unix() < timeInt {
  142. os.Remove("./logs/" + fname)
  143. }
  144. }
  145. }
  146. time.AfterFunc(24*time.Hour, clearLogs)
  147. }