package main import ( codegrpc "analysiscode/client" _ "filter" "fmt" gojs "gorunjs/client" "io/ioutil" "os" "spider" mgo "mongodb" qu "qfw/util" es "qfw/util/elastic.v7" "regexp" "runtime" . "spiderutil" "time" //"qfw/util/redis" "github.com/donnie4w/go-logger/logger" "github.com/go-xweb/xweb" "github.com/yuin/gopher-lua" ) var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}") func init() { qu.ReadConfig(&Config) //初始化lua重载函数 lua.Disablelib(Config.Luadisablelib) //初始化数据库 InitMgo() spider.InitOther() //加载其他信息 //验证码识别client codegrpc.InitCodeGrpcClient() //go执行js服务 gojs.InitGoRunJsClient() InitRedisClient(Config.Redisservers) //初始化Redis InitBloomRedisClient(Config.BloomRedisservers) //初始化Bloom Redis //初始化es spider.EsIndex = qu.ObjToString(Config.Es["index"]) spider.EsType = qu.ObjToString(Config.Es["type"]) spider.Es = &es.Elastic{ S_esurl: qu.ObjToString(Config.Es["addr"]), I_size: qu.IntAll(Config.Es["pool"]), } spider.Es.InitElasticSize() //启动消息服务 spider.InitMsgClient(Config.Msgserveraddr, Config.Msgname) spider.InitMsgClientFile(Config.MsgserveraddrFile, Config.Msgname+"file") spider.InitMsgClientChromedp(Config.MsgserveraddrChromedp, Config.Msgname+"chromedp") //初始化网络存储服务 //InitWeedcl() OssInit( qu.ObjToString(Config.OssInfo["ossEndpoint"]), qu.ObjToString(Config.OssInfo["ossAccessKeyId"]), qu.ObjToString(Config.OssInfo["ossAccessKeySecret"]), qu.ObjToString(Config.OssInfo["ossBucketName"]), ) //xweb框架配置 logger.SetConsole(false) if Config.LogLevel <= 1 { logger.SetLevel(logger.DEBUG) } else if Config.LogLevel == 2 { logger.SetLevel(logger.INFO) } else { logger.SetLevel(logger.WARN) } logger.SetRollingDaily("./logs", "spider.log") xweb.Config.RecoverPanic = true xweb.Config.Profiler = true xweb.RootApp().AppConfig.TemplateDir = "web/templates" xweb.RootApp().AppConfig.StaticDir = "web/staticres" xweb.RootApp().AppConfig.StaticFileVersion = false xweb.RootApp().AppConfig.CheckXsrf = false xweb.RootApp().AppConfig.ReloadTemplates = false xweb.RootApp().AppConfig.EnableHttpCache = false xweb.RootApp().AppConfig.Mode = xweb.Product xweb.RootApp().AppConfig.CacheTemplates = false xweb.AddAction(&spider.Front{}) xweb.RootApp().AppConfig.SessionTimeout = 30 * time.Minute xweb.RootApp().Logger.SetOutputLevel(4) } //初始化数据源 func InitMgo() { spider.MgoS = &mgo.MongodbSim{ MongodbAddr: Config.Mongodb_spider, Size: Config.Spider_dbsize, DbName: "spider", } spider.MgoS.InitPool() //spider.MgoE = &mgo.MongodbSim{ // MongodbAddr: Config.Mongodb_editor, // Size: Config.Editor_dbsize, // DbName: "editor", //} //spider.MgoE.InitPool() spider.MgoEB = &mgo.MongodbSim{ MongodbAddr: Config.BidEditor.Addr, Size: Config.BidEditor.Size, DbName: Config.BidEditor.Db, UserName: Config.BidEditor.Username, Password: Config.BidEditor.Password, } spider.MgoEB.InitPool() } // func main() { //临时统计总的线程数 go spider.AllThreadLog() //定时上传流量信息 //go spider.TimeTask() //定时清理日志 go clearLogs() //初始化爬虫服务 go spider.InitSpider() //清理计数 go spider.GcCount() //定时重载脚本文件 go spider.ReloadSpiderFile() //内存信息 go heapprint() //查列表页信息采集三级页 go spider.DetailData() //处理心跳信息 go spider.SaveHeartInfo() //批量保存心跳信息 go spider.UpdateHeartInfo() //7000历史节点下载详情页 go spider.HistoryEventDownloadDetail() //批量保存data_bak //go spider.SaveDataBak() //批量保存错误数据 //go spider.UpdateErrDataMgo() //爬虫信息提交编辑器 //go spider.SpiderInfoSend() //保存爬虫采集非本站点数据 //go spider.SaveOtherSiteData() logger.Debug(Config.Webport) xweb.Run(":" + Config.Webport) } func heapprint() { var m runtime.MemStats runtime.ReadMemStats(&m) fmt.Printf("申请内存:%dM,分配内存:%dM,未使用内存:%dM,回收内存:%dM\n", m.HeapSys/(1024*1024), m.HeapAlloc/(1024*1024), m.HeapIdle/(1024*1024), m.HeapReleased/(1024*1024)) time.AfterFunc(1*time.Minute, heapprint) } func clearLogs() { fmt.Println("=======clearLogs========") timeInt := time.Now().AddDate(0, 0, -30).Unix() dirs, err := ioutil.ReadDir("./logs") if err == nil { for _, f := range dirs { fname := f.Name() logTimeStr := timeReg.FindString(fname) if logTimeStr == "" { continue } logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local) if logTimeInt.Unix() < timeInt { os.Remove("./logs/" + fname) } } } time.AfterFunc(24*time.Hour, clearLogs) }