123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175 |
- package main
- import (
- codegrpc "analysiscode/client"
- _ "filter"
- "fmt"
- gojs "gorunjs/client"
- "io/ioutil"
- "os"
- "spider"
- mgo "mongodb"
- qu "qfw/util"
- es "qfw/util/elastic.v7"
- "regexp"
- "runtime"
- . "spiderutil"
- "time"
- //"qfw/util/redis"
- "github.com/donnie4w/go-logger/logger"
- "github.com/go-xweb/xweb"
- "github.com/yuin/gopher-lua"
- )
- var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
- func init() {
- qu.ReadConfig(&Config)
- //初始化lua重载函数
- lua.Disablelib(Config.Luadisablelib)
- //初始化数据库
- InitMgo()
- spider.InitOther() //加载其他信息
- //验证码识别client
- codegrpc.InitCodeGrpcClient()
- //go执行js服务
- gojs.InitGoRunJsClient()
- InitRedisClient(Config.Redisservers) //初始化Redis
- InitBloomRedisClient(Config.BloomRedisservers) //初始化Bloom Redis
- //初始化es
- spider.EsIndex = qu.ObjToString(Config.Es["index"])
- spider.EsType = qu.ObjToString(Config.Es["type"])
- spider.Es = &es.Elastic{
- S_esurl: qu.ObjToString(Config.Es["addr"]),
- I_size: qu.IntAll(Config.Es["pool"]),
- }
- spider.Es.InitElasticSize()
- //启动消息服务
- spider.InitMsgClient(Config.Msgserveraddr, Config.Msgname)
- spider.InitMsgClientFile(Config.MsgserveraddrFile, Config.Msgname+"file")
- spider.InitMsgClientChromedp(Config.MsgserveraddrChromedp, Config.Msgname+"chromedp")
- //初始化网络存储服务
- //InitWeedcl()
- OssInit(
- qu.ObjToString(Config.OssInfo["ossEndpoint"]),
- qu.ObjToString(Config.OssInfo["ossAccessKeyId"]),
- qu.ObjToString(Config.OssInfo["ossAccessKeySecret"]),
- qu.ObjToString(Config.OssInfo["ossBucketName"]),
- )
- //xweb框架配置
- logger.SetConsole(false)
- if Config.LogLevel <= 1 {
- logger.SetLevel(logger.DEBUG)
- } else if Config.LogLevel == 2 {
- logger.SetLevel(logger.INFO)
- } else {
- logger.SetLevel(logger.WARN)
- }
- logger.SetRollingDaily("./logs", "spider.log")
- xweb.Config.RecoverPanic = true
- xweb.Config.Profiler = true
- xweb.RootApp().AppConfig.TemplateDir = "web/templates"
- xweb.RootApp().AppConfig.StaticDir = "web/staticres"
- xweb.RootApp().AppConfig.StaticFileVersion = false
- xweb.RootApp().AppConfig.CheckXsrf = false
- xweb.RootApp().AppConfig.ReloadTemplates = false
- xweb.RootApp().AppConfig.EnableHttpCache = false
- xweb.RootApp().AppConfig.Mode = xweb.Product
- xweb.RootApp().AppConfig.CacheTemplates = false
- xweb.AddAction(&spider.Front{})
- xweb.RootApp().AppConfig.SessionTimeout = 30 * time.Minute
- xweb.RootApp().Logger.SetOutputLevel(4)
- }
- //初始化数据源
- func InitMgo() {
- spider.MgoS = &mgo.MongodbSim{
- MongodbAddr: Config.Mongodb_spider,
- Size: Config.Spider_dbsize,
- DbName: "spider",
- }
- spider.MgoS.InitPool()
- //spider.MgoE = &mgo.MongodbSim{
- // MongodbAddr: Config.Mongodb_editor,
- // Size: Config.Editor_dbsize,
- // DbName: "editor",
- //}
- //spider.MgoE.InitPool()
- spider.MgoEB = &mgo.MongodbSim{
- MongodbAddr: Config.BidEditor.Addr,
- Size: Config.BidEditor.Size,
- DbName: Config.BidEditor.Db,
- UserName: Config.BidEditor.Username,
- Password: Config.BidEditor.Password,
- }
- spider.MgoEB.InitPool()
- }
- //
- func main() {
- //临时统计总的线程数
- go spider.AllThreadLog()
- //定时上传流量信息
- //go spider.TimeTask()
- //定时清理日志
- go clearLogs()
- //初始化爬虫服务
- go spider.InitSpider()
- //清理计数
- go spider.GcCount()
- //定时重载脚本文件
- go spider.ReloadSpiderFile()
- //内存信息
- go heapprint()
- //查列表页信息采集三级页
- go spider.DetailData()
- //处理心跳信息
- go spider.SaveHeartInfo()
- //批量保存心跳信息
- go spider.UpdateHeartInfo()
- //7000历史节点下载详情页
- go spider.HistoryEventDownloadDetail()
- //批量保存data_bak
- //go spider.SaveDataBak()
- //批量保存错误数据
- //go spider.UpdateErrDataMgo()
- //爬虫信息提交编辑器
- //go spider.SpiderInfoSend()
- //保存爬虫采集非本站点数据
- //go spider.SaveOtherSiteData()
- logger.Debug(Config.Webport)
- xweb.Run(":" + Config.Webport)
- }
- func heapprint() {
- var m runtime.MemStats
- runtime.ReadMemStats(&m)
- fmt.Printf("申请内存:%dM,分配内存:%dM,未使用内存:%dM,回收内存:%dM\n", m.HeapSys/(1024*1024), m.HeapAlloc/(1024*1024),
- m.HeapIdle/(1024*1024), m.HeapReleased/(1024*1024))
- time.AfterFunc(1*time.Minute, heapprint)
- }
- func clearLogs() {
- fmt.Println("=======clearLogs========")
- timeInt := time.Now().AddDate(0, 0, -30).Unix()
- dirs, err := ioutil.ReadDir("./logs")
- if err == nil {
- for _, f := range dirs {
- fname := f.Name()
- logTimeStr := timeReg.FindString(fname)
- if logTimeStr == "" {
- continue
- }
- logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local)
- if logTimeInt.Unix() < timeInt {
- os.Remove("./logs/" + fname)
- }
- }
- }
- time.AfterFunc(24*time.Hour, clearLogs)
- }
|