123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- package main
- import (
- _ "filter"
- "fmt"
- "io/ioutil"
- "os"
- "spider"
- mgo "mongodb"
- qu "qfw/util"
- es "qfw/util/elastic"
- mgu "qfw/util/mongodbutil"
- "regexp"
- "runtime"
- . "spiderutil"
- "time"
- //"qfw/util/redis"
- "github.com/donnie4w/go-logger/logger"
- "github.com/go-xweb/xweb"
- "github.com/yuin/gopher-lua"
- )
- var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
- func init() {
- qu.ReadConfig(&Config)
- //初始化lua重载函数
- lua.Disablelib(Config.Luadisablelib)
- //初始化数据库
- initConfig(Config.Mongodb_spider, "spider", "spider", Config.Spider_dbsize)
- initConfig(Config.Mongodb_editor, "editor", "editor", Config.Editor_dbsize)
- mgu.InitMongodbPool()
- spider.Mgo = &mgo.MongodbSim{
- MongodbAddr: Config.Mongodb_spider,
- Size: 50,
- DbName: "spider",
- }
- spider.Mgo.InitPool()
- //初始化Redis
- InitRedis(Config.Redisservers)
- // if Config.Redistype == "0" {
- // redis.InitRedis(Config.Redisservers)
- // } else { //redis集群
- // InitRedisCluster(Config.Redishosts, 20, 100, false)
- // }
- //初始化es
- spider.EsIndex = qu.ObjToString(Config.Es["index"])
- spider.EsType = qu.ObjToString(Config.Es["type"])
- spider.Es = &es.Elastic{
- S_esurl: qu.ObjToString(Config.Es["addr"]),
- I_size: qu.IntAll(Config.Es["pool"]),
- }
- spider.Es.InitElasticSize()
- //启动消息服务
- spider.InitMsgClient(Config.Msgserveraddr, Config.Msgname)
- spider.InitMsgClientFile(Config.MsgserveraddrFile, Config.Msgname+"file")
- //初始化网络存储服务
- //InitWeedcl()
- OssInit(
- qu.ObjToString(Config.OssInfo["ossEndpoint"]),
- qu.ObjToString(Config.OssInfo["ossAccessKeyId"]),
- qu.ObjToString(Config.OssInfo["ossAccessKeySecret"]),
- qu.ObjToString(Config.OssInfo["ossBucketName"]),
- )
- //xweb框架配置
- logger.SetConsole(false)
- if Config.LogLevel <= 1 {
- logger.SetLevel(logger.DEBUG)
- } else if Config.LogLevel == 2 {
- logger.SetLevel(logger.INFO)
- } else {
- logger.SetLevel(logger.WARN)
- }
- logger.SetRollingDaily("./logs", "spider.log")
- xweb.Config.RecoverPanic = true
- xweb.Config.Profiler = true
- xweb.RootApp().AppConfig.TemplateDir = "web/templates"
- xweb.RootApp().AppConfig.StaticDir = "web/staticres"
- xweb.RootApp().AppConfig.StaticFileVersion = false
- xweb.RootApp().AppConfig.CheckXsrf = false
- xweb.RootApp().AppConfig.ReloadTemplates = false
- xweb.RootApp().AppConfig.EnableHttpCache = false
- xweb.RootApp().AppConfig.Mode = xweb.Product
- xweb.RootApp().AppConfig.CacheTemplates = false
- xweb.AddAction(&spider.Front{})
- xweb.RootApp().AppConfig.SessionTimeout = 30 * time.Minute
- xweb.RootApp().Logger.SetOutputLevel(4)
- }
- //初始化数据源
- func initConfig(addr, alias, db string, dbsize int) {
- conf := *new(mgu.PoolConfig)
- conf.Addr = addr
- conf.Alias = alias
- conf.DB = db
- conf.Size = dbsize
- mgu.Config = append(mgu.Config, conf)
- }
- //
- func main() {
- //定时清理日志
- go clearLogs()
- //初始化爬虫服务
- go spider.InitSpider()
- //清理计数
- go spider.GcCount()
- //定时重载脚本文件
- go spider.ReloadSpiderFile()
- //爬虫信息提交编辑器
- go spider.SpiderInfoSend()
- //处理心跳信息
- go spider.SaveHeartInfo()
- //内存信息
- go heapprint()
- //查列表页信息采集三级页
- go spider.DetailData()
- //批量保存错误数据
- go spider.UpdateErrDataMgo()
- //保存爬虫采集非本站点数据
- //go spider.SaveOtherSiteData()
- //批量保存心跳信息
- go spider.UpdateHeartInfo()
- logger.Debug(Config.Webport)
- xweb.Run(":" + Config.Webport)
- }
- func heapprint() {
- var m runtime.MemStats
- runtime.ReadMemStats(&m)
- fmt.Printf("申请内存:%dM,分配内存:%dM,未使用内存:%dM,回收内存:%dM\n", m.HeapSys/(1024*1024), m.HeapAlloc/(1024*1024),
- m.HeapIdle/(1024*1024), m.HeapReleased/(1024*1024))
- time.AfterFunc(1*time.Minute, heapprint)
- }
- func clearLogs() {
- fmt.Println("=======clearLogs========")
- timeInt := time.Now().AddDate(0, 0, -30).Unix()
- dirs, err := ioutil.ReadDir("./logs")
- if err == nil {
- for _, f := range dirs {
- fname := f.Name()
- logTimeStr := timeReg.FindString(fname)
- if logTimeStr == "" {
- continue
- }
- logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local)
- if logTimeInt.Unix() < timeInt {
- os.Remove("./logs/" + fname)
- }
- }
- }
- time.AfterFunc(24*time.Hour, clearLogs)
- }
|