package main import ( "fmt" "io/ioutil" mgo "mongodb" "os" qu "qfw/util" "regexp" "time" "github.com/cron" "github.com/donnie4w/go-logger/logger" ) var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}") var ( Config map[string]interface{} //User map[string]string MgoE *mgo.MongodbSim //editor MgoS *mgo.MongodbSim //spider ) func init() { qu.ReadConfig(&Config) //qu.ReadConfig("./user.json", &User) //mgo spider := Config["spider"].(map[string]interface{}) MgoS = &mgo.MongodbSim{ MongodbAddr: qu.ObjToString(spider["addr"]), DbName: qu.ObjToString(spider["db"]), Size: qu.IntAll(spider["size"]), } MgoS.InitPool() editor := Config["editor"].(map[string]interface{}) MgoE = &mgo.MongodbSim{ MongodbAddr: qu.ObjToString(editor["addr"]), DbName: qu.ObjToString(editor["db"]), Size: qu.IntAll(editor["size"]), } MgoE.InitPool() //logs logger.SetRollingDaily("./logs", "task.log") //爬虫上下浮动率 DownloadCheck = make(map[string]*DC) downloadcheck := Config["downloadcheck"].(map[string]interface{}) for _, tmp := range downloadcheck { tmpMap := tmp.(map[string]interface{}) downratio := qu.Float64All(tmpMap["downratio"]) uptatio := qu.Float64All(tmpMap["upratio"]) codes := tmpMap["spidercode"].([]interface{}) if len(codes) > 0 { for _, code := range codes { c := qu.ObjToString(code) DownloadCheck[c] = &DC{ DownRatio: 1.0 - downratio/100.0, UpRatio: 1.0 + uptatio/100.0, } } } else { DownloadCheck["other"] = &DC{ DownRatio: 1.0 - downratio/100.0, UpRatio: 1.0 + uptatio/100.0, } } } StartTaskCron = qu.ObjToString(Config["startaskcron"]) UpdateStateCron = qu.ObjToString(Config["updatestatecron"]) CodeSummaryCron = qu.ObjToString(Config["codesummarycron"]) CloseNum = qu.IntAll(Config["closenum"]) DayNum = qu.IntAll(Config["daynum"]) // YearMinDownloadNum = qu.IntAll(Config["yearmindownload"]) IntervalMaxNum = qu.IntAll(Config["intervalmaxnum"]) } func main() { go clearLogs() //低采集量爬虫新建任务 LuaYearMinCodeCreateTask() c := cron.New() c.Start() c.AddFunc(StartTaskCron, StartTask) //开始任务 //c.AddFunc(CodeSummaryCron, SummaryCode) //上架爬虫信息汇总 //c.AddFunc(UpdateStateCron, ResetDataState) //更新数据状态 //统计爬虫历史下载量制定任务周期 // GetSpidercode() // TagCode() // CycleTime() ch := make(chan bool, 1) <-ch } func clearLogs() { fmt.Println("=======clearLogs========") timeInt := time.Now().AddDate(0, 0, -30).Unix() dirs, err := ioutil.ReadDir("./logs") if err == nil { for _, f := range dirs { fname := f.Name() logTimeStr := timeReg.FindString(fname) if logTimeStr == "" { continue } logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local) if logTimeInt.Unix() < timeInt { os.Remove("./logs/" + fname) } } } time.AfterFunc(24*time.Hour, clearLogs) }