|
@@ -0,0 +1,348 @@
|
|
|
+package timetask
|
|
|
+
|
|
|
+import (
|
|
|
+ "bytes"
|
|
|
+ "fmt"
|
|
|
+ "github.com/tealeg/xlsx"
|
|
|
+ qu "qfw/util"
|
|
|
+ gm "qfw/util/mail"
|
|
|
+ "strconv"
|
|
|
+ "sync"
|
|
|
+ "time"
|
|
|
+ "util"
|
|
|
+)
|
|
|
+
|
|
|
+func SpiderWeeklyReport() {
|
|
|
+ defer qu.Catch()
|
|
|
+ eTime := util.GetTime(0)
|
|
|
+ sTime := util.GetTime(-7)
|
|
|
+ qu.Debug(sTime, eTime)
|
|
|
+ //上周统计信息
|
|
|
+ LastWeekAddTaskAllNum, //上周新建任务数
|
|
|
+ LastWeekAddLuaNum, //lua上周新增爬虫数
|
|
|
+ LastWeekAddEffectTaskNum, //上周新增有效任务数
|
|
|
+ LastWeekAddPythonNum, //python上周新增爬虫数
|
|
|
+ LastWeekCheckTaskNum, //上周核实任务数
|
|
|
+ LastWeekFinishLuaNum, //lua上周完成爬虫数
|
|
|
+ LastWeekFinishEffectTaskNum, //上周完成有效任务数
|
|
|
+ LastWeekFinishPythonNum := GetLastWeekReport() //python上周完成爬虫数
|
|
|
+ //1、任务相关
|
|
|
+ //上周新建任务数量
|
|
|
+ qu.Debug("上周新建任务数量:", LastWeekAddTaskAllNum)
|
|
|
+ //本周新建任务数量
|
|
|
+ query := map[string]interface{}{
|
|
|
+ "l_comeintime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ ThisWeekAddTaskNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("本周新建任务数量:", ThisWeekAddTaskNum)
|
|
|
+ //新建任务同比增减
|
|
|
+ AddTaskIncDecRatio := float64(0)
|
|
|
+ if ThisWeekAddTaskNum != 0 {
|
|
|
+ AddTaskIncDecRatio = float64(ThisWeekAddTaskNum-LastWeekAddTaskAllNum) / float64(ThisWeekAddTaskNum)
|
|
|
+ }
|
|
|
+ AddTaskIncDecRatio, _ = strconv.ParseFloat(fmt.Sprintf("%.4f", AddTaskIncDecRatio), 64)
|
|
|
+ qu.Debug("新建任务同比增减:", AddTaskIncDecRatio)
|
|
|
+ //上周核实任务数量
|
|
|
+ qu.Debug("上周核实任务数量:", LastWeekCheckTaskNum)
|
|
|
+ //本周核实任务数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "l_checktime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "i_state": map[string]interface{}{
|
|
|
+ "$gte": 2, //任务状态:处理中、待审核、审核通过、未通过、关闭
|
|
|
+ },
|
|
|
+ }
|
|
|
+ ThisWeekCheckTaskNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("本周核实任务数量:", ThisWeekCheckTaskNum)
|
|
|
+ //核实任务同比增减
|
|
|
+ CheckTaskIncDecRatio := float64(0)
|
|
|
+ if ThisWeekCheckTaskNum != 0 {
|
|
|
+ CheckTaskIncDecRatio = float64(ThisWeekCheckTaskNum-LastWeekCheckTaskNum) / float64(ThisWeekCheckTaskNum)
|
|
|
+ }
|
|
|
+ CheckTaskIncDecRatio, _ = strconv.ParseFloat(fmt.Sprintf("%.4f", CheckTaskIncDecRatio), 64)
|
|
|
+ qu.Debug("核实任务同比增减:", CheckTaskIncDecRatio)
|
|
|
+ //待核实任务总数
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "i_state": map[string]interface{}{
|
|
|
+ "$lte": 1, //任务状态:待确认、待处理
|
|
|
+ },
|
|
|
+ }
|
|
|
+ NeedToCheckTaskAllNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("待核实任务总数:", NeedToCheckTaskAllNum)
|
|
|
+
|
|
|
+ //2、lua新增爬虫
|
|
|
+ //上周新建爬虫数量
|
|
|
+ qu.Debug("lua上周新建爬虫数量:", LastWeekAddLuaNum)
|
|
|
+ //本周新建爬虫数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "platform": "golua平台",
|
|
|
+ }
|
|
|
+ ThisWeekAddLuaNum := util.MgoEB.Count("luaconfig", query)
|
|
|
+ qu.Debug("lua本周新建爬虫数量:", ThisWeekAddLuaNum)
|
|
|
+ //lua新建爬虫同比增减
|
|
|
+ AddLuaIncDecRatio := float64(0)
|
|
|
+ if ThisWeekAddLuaNum != 0 {
|
|
|
+ AddLuaIncDecRatio = float64(ThisWeekAddLuaNum-LastWeekAddLuaNum) / float64(ThisWeekAddLuaNum)
|
|
|
+ }
|
|
|
+ AddLuaIncDecRatio, _ = strconv.ParseFloat(fmt.Sprintf("%.4f", AddLuaIncDecRatio), 64)
|
|
|
+ qu.Debug("lua新建爬虫同比增减:", AddLuaIncDecRatio)
|
|
|
+ //上周已完成新建爬虫数量
|
|
|
+ qu.Debug("lua上周已完成新建爬虫数量:", LastWeekFinishLuaNum)
|
|
|
+ //本周已完成新建爬虫数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ ThisWeekFinishLuaNum := util.MgoE.Count("lua_logs_auditor_new", query)
|
|
|
+ qu.Debug("lua本周已完成新建爬虫数量:", ThisWeekFinishLuaNum)
|
|
|
+ //lua完成新建爬虫同比增减
|
|
|
+ FinishLuaIncDecRatio := float64(0)
|
|
|
+ if ThisWeekFinishLuaNum != 0 {
|
|
|
+ FinishLuaIncDecRatio = float64(ThisWeekFinishLuaNum-LastWeekFinishLuaNum) / float64(ThisWeekFinishLuaNum)
|
|
|
+ }
|
|
|
+ FinishLuaIncDecRatio, _ = strconv.ParseFloat(fmt.Sprintf("%.4f", FinishLuaIncDecRatio), 64)
|
|
|
+ qu.Debug("lua完成新建爬虫同比增减:", FinishLuaIncDecRatio)
|
|
|
+ //待完成新建爬虫总数
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "event": 7000,
|
|
|
+ "platform": "golua平台",
|
|
|
+ "state": map[string]interface{}{
|
|
|
+ "$lte": 2, //待完成、待审核、未通过
|
|
|
+ },
|
|
|
+ }
|
|
|
+ NeedToFinishNewLuaAllNum := CountNeedToFinishNewLuaAllNum(query)
|
|
|
+ qu.Debug("lua待完成新建爬虫总数:", NeedToFinishNewLuaAllNum)
|
|
|
+
|
|
|
+ //3、lua历史维护
|
|
|
+ //上周新增待维护任务数量
|
|
|
+ qu.Debug("上周新增待维护任务数量:", LastWeekAddEffectTaskNum)
|
|
|
+ //本周新增待维护任务数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "l_checktime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "i_state": map[string]interface{}{ //处理中、待审核、审核通过、未通过
|
|
|
+ "$gte": 2,
|
|
|
+ "$lte": 5,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ ThisWeekAddEffectTaskNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("本周新增待维护任务数量:", ThisWeekAddEffectTaskNum)
|
|
|
+ //新建待维护任务同比增减
|
|
|
+ AddEffectTaskIncDecRatio := float64(0)
|
|
|
+ if ThisWeekAddEffectTaskNum != 0 {
|
|
|
+ AddEffectTaskIncDecRatio = float64(ThisWeekAddEffectTaskNum-LastWeekAddEffectTaskNum) / float64(ThisWeekAddEffectTaskNum)
|
|
|
+ }
|
|
|
+ AddEffectTaskIncDecRatio, _ = strconv.ParseFloat(fmt.Sprintf("%.4f", AddEffectTaskIncDecRatio), 64)
|
|
|
+ qu.Debug("新建待维护任务同比增减:", AddEffectTaskIncDecRatio)
|
|
|
+ //上周完成待维护任务数量
|
|
|
+ qu.Debug("上周完成待维护任务数量:", LastWeekFinishEffectTaskNum)
|
|
|
+ //本周完成待维护任务数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "l_uploadtime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "i_state": 4, //审核通过
|
|
|
+ }
|
|
|
+ ThisWeekFinishEffectTaskNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("本周完成待维护任务数量:", ThisWeekFinishEffectTaskNum)
|
|
|
+ //完成待维护任务同比增减
|
|
|
+ FinishEffectTaskIncDecRatio := float64(0)
|
|
|
+ if ThisWeekFinishEffectTaskNum != 0 {
|
|
|
+ FinishEffectTaskIncDecRatio = float64(ThisWeekFinishEffectTaskNum-LastWeekFinishEffectTaskNum) / float64(ThisWeekFinishEffectTaskNum)
|
|
|
+ }
|
|
|
+ FinishEffectTaskIncDecRatio, _ = strconv.ParseFloat(fmt.Sprintf("%.4f", FinishEffectTaskIncDecRatio), 64)
|
|
|
+ qu.Debug("完成待维护任务同比增减:", FinishEffectTaskIncDecRatio)
|
|
|
+ //待维护任务总数
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "i_state": map[string]interface{}{
|
|
|
+ "$in": []int{2, 3, 5}, //处理中、待审核、未通过
|
|
|
+ },
|
|
|
+ }
|
|
|
+ NeedToFinishEffectTaskAllNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("待维护任务总数:", NeedToFinishEffectTaskAllNum)
|
|
|
+
|
|
|
+ //4、python爬虫
|
|
|
+ //上周新建爬虫数量
|
|
|
+ qu.Debug("python上周新建爬虫数量:", LastWeekAddPythonNum)
|
|
|
+ //本周新建爬虫数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "platform": "python",
|
|
|
+ }
|
|
|
+ ThisWeekAddPythonNum := util.MgoEB.Count("luaconfig", query)
|
|
|
+ qu.Debug("python本周新建爬虫数量:", ThisWeekAddPythonNum)
|
|
|
+ //新建爬虫数量同比增减
|
|
|
+ AddPythonIncDecRatio := float64(0)
|
|
|
+ if ThisWeekAddPythonNum != 0 {
|
|
|
+ AddPythonIncDecRatio = float64(ThisWeekAddPythonNum-LastWeekAddPythonNum) / float64(ThisWeekAddPythonNum)
|
|
|
+ }
|
|
|
+ AddPythonIncDecRatio, _ = strconv.ParseFloat(fmt.Sprintf("%.4f", AddPythonIncDecRatio), 64)
|
|
|
+ qu.Debug("python新建爬虫同比增减:", AddPythonIncDecRatio)
|
|
|
+ //上周已完成爬虫数量
|
|
|
+ qu.Debug("python上周已完成爬虫数量:", LastWeekFinishPythonNum)
|
|
|
+ //本周已完成爬虫数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "l_uploadtime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "platform": "python",
|
|
|
+ "state": 11,
|
|
|
+ }
|
|
|
+ ThisWeekFinishPythonNum := util.MgoE.Count("python_logs_auditor", query)
|
|
|
+ qu.Debug("python本周已完成爬虫数量:", ThisWeekFinishPythonNum)
|
|
|
+ //已完成爬虫数量同比增减
|
|
|
+ FinishPythonIncDecRatio := float64(0)
|
|
|
+ if ThisWeekFinishPythonNum != 0 {
|
|
|
+ FinishPythonIncDecRatio = float64(ThisWeekFinishPythonNum-LastWeekFinishPythonNum) / float64(ThisWeekFinishPythonNum)
|
|
|
+ }
|
|
|
+ FinishPythonIncDecRatio, _ = strconv.ParseFloat(fmt.Sprintf("%.4f", FinishPythonIncDecRatio), 64)
|
|
|
+ qu.Debug("python已完成爬虫数量同比增减:", FinishPythonIncDecRatio)
|
|
|
+ //python待完成爬虫总数
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "$or": []interface{}{
|
|
|
+ map[string]interface{}{
|
|
|
+ "platform": "python",
|
|
|
+ "state": map[string]interface{}{
|
|
|
+ "$nin": []int{4, 5, 7, 10, 11}, //除已作废、已上架、无发布、已删除、已上线状态的爬虫都是待开发的爬虫
|
|
|
+ },
|
|
|
+ },
|
|
|
+ map[string]interface{}{
|
|
|
+ "platform": "golua平台",
|
|
|
+ "state": 9, //lua平台无法处理(转python)的爬虫算到python待完成的爬虫里
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ NeedToFinishPythonAllNum := util.MgoEB.Count("luaconfig", query)
|
|
|
+ qu.Debug("python待完成爬虫总数:", NeedToFinishPythonAllNum)
|
|
|
+
|
|
|
+ //保存本周记录
|
|
|
+ util.MgoS.Save("spider_weeklyreport", map[string]interface{}{
|
|
|
+ "addtasknum": ThisWeekAddTaskNum,
|
|
|
+ "addluanum": ThisWeekAddLuaNum,
|
|
|
+ "addeffecttasknum": ThisWeekAddEffectTaskNum,
|
|
|
+ "addpythonnum": ThisWeekAddPythonNum,
|
|
|
+ "checktasknum": ThisWeekCheckTaskNum,
|
|
|
+ "finishluanum": ThisWeekFinishLuaNum,
|
|
|
+ "finisheffecttasknum": ThisWeekFinishEffectTaskNum,
|
|
|
+ "finishpythonnum": ThisWeekFinishPythonNum,
|
|
|
+ "needchecktaskallnum": NeedToCheckTaskAllNum,
|
|
|
+ "needfinishluaallnum": NeedToFinishNewLuaAllNum,
|
|
|
+ "needfinishtaskallnum": NeedToFinishEffectTaskAllNum,
|
|
|
+ "needfinishpythonallnum": NeedToFinishPythonAllNum,
|
|
|
+ "comeintime": time.Now().Unix(),
|
|
|
+ })
|
|
|
+ //生成excel
|
|
|
+ file, err := xlsx.OpenFile("res/report.xlsx")
|
|
|
+ if err != nil {
|
|
|
+ qu.Debug("Open Report File Error:", err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ sheet := file.Sheets[0]
|
|
|
+ taskRow := sheet.Rows[1]
|
|
|
+ taskRow.Cells[1].SetValue(LastWeekAddTaskAllNum)
|
|
|
+ taskRow.Cells[2].SetValue(ThisWeekAddTaskNum)
|
|
|
+ taskRow.Cells[3].SetValue(fmt.Sprint(AddTaskIncDecRatio*100) + "%")
|
|
|
+ taskRow.Cells[4].SetValue(LastWeekCheckTaskNum)
|
|
|
+ taskRow.Cells[5].SetValue(ThisWeekCheckTaskNum)
|
|
|
+ taskRow.Cells[6].SetValue(fmt.Sprint(CheckTaskIncDecRatio*100) + "%")
|
|
|
+ taskRow.Cells[7].SetValue(NeedToCheckTaskAllNum)
|
|
|
+ newLuaRow := sheet.Rows[2]
|
|
|
+ newLuaRow.Cells[1].SetValue(LastWeekAddLuaNum)
|
|
|
+ newLuaRow.Cells[2].SetValue(ThisWeekAddLuaNum)
|
|
|
+ newLuaRow.Cells[3].SetValue(fmt.Sprint(AddLuaIncDecRatio*100) + "%")
|
|
|
+ newLuaRow.Cells[4].SetValue(LastWeekFinishLuaNum)
|
|
|
+ newLuaRow.Cells[5].SetValue(ThisWeekFinishLuaNum)
|
|
|
+ newLuaRow.Cells[6].SetValue(fmt.Sprint(FinishLuaIncDecRatio*100) + "%")
|
|
|
+ newLuaRow.Cells[7].SetValue(NeedToFinishNewLuaAllNum)
|
|
|
+ historyLuaRow := sheet.Rows[3]
|
|
|
+ historyLuaRow.Cells[1].SetValue(LastWeekAddEffectTaskNum)
|
|
|
+ historyLuaRow.Cells[2].SetValue(ThisWeekAddEffectTaskNum)
|
|
|
+ historyLuaRow.Cells[3].SetValue(fmt.Sprint(AddEffectTaskIncDecRatio*100) + "%")
|
|
|
+ historyLuaRow.Cells[4].SetValue(LastWeekFinishEffectTaskNum)
|
|
|
+ historyLuaRow.Cells[5].SetValue(ThisWeekFinishEffectTaskNum)
|
|
|
+ historyLuaRow.Cells[6].SetValue(fmt.Sprint(FinishEffectTaskIncDecRatio*100) + "%")
|
|
|
+ historyLuaRow.Cells[7].SetValue(NeedToFinishEffectTaskAllNum)
|
|
|
+ pythonRow := sheet.Rows[4]
|
|
|
+ pythonRow.Cells[1].SetValue(LastWeekAddPythonNum)
|
|
|
+ pythonRow.Cells[2].SetValue(ThisWeekAddPythonNum)
|
|
|
+ pythonRow.Cells[3].SetValue(fmt.Sprint(AddPythonIncDecRatio*100) + "%")
|
|
|
+ pythonRow.Cells[4].SetValue(LastWeekFinishPythonNum)
|
|
|
+ pythonRow.Cells[5].SetValue(ThisWeekFinishPythonNum)
|
|
|
+ pythonRow.Cells[6].SetValue(fmt.Sprint(FinishPythonIncDecRatio*100) + "%")
|
|
|
+ pythonRow.Cells[7].SetValue(NeedToFinishPythonAllNum)
|
|
|
+ mw := &util.MyWrite{
|
|
|
+ Byte: &bytes.Buffer{},
|
|
|
+ }
|
|
|
+ file.Write(mw)
|
|
|
+ bt := mw.Byte.Bytes()
|
|
|
+ now := time.Now()
|
|
|
+ name := qu.FormatDate(&now, qu.Date_Short_Layout) + "爬虫统计周报.xlsx"
|
|
|
+ gm.GSendMail_Bq("jy@jianyu360.cn", "zhangjinkun@topnet.net.cn,maxiaoshan@topnet.net.cn", "", "", "爬虫统计周报", "", name, bt, util.GMail)
|
|
|
+}
|
|
|
+
|
|
|
+func GetLastWeekReport() (int, int, int, int, int, int, int, int) {
|
|
|
+ defer qu.Catch()
|
|
|
+ datas, _ := util.MgoS.Find("spider_weeklyreport", nil, map[string]interface{}{"_id": -1}, nil, true, 0, 1)
|
|
|
+ if len(*datas) == 1 {
|
|
|
+ data := (*datas)[0]
|
|
|
+ return qu.IntAll(data["addtasknum"]), qu.IntAll(data["addluanum"]), qu.IntAll(data["addeffecttasknum"]), qu.IntAll(data["addpythonnum"]), qu.IntAll(data["checktasknum"]), qu.IntAll(data["finishluanum"]), qu.IntAll(data["finisheffecttasknum"]), qu.IntAll(data["finishpythonnum"])
|
|
|
+ } else {
|
|
|
+ qu.Debug("历史周报信息查询失败")
|
|
|
+ }
|
|
|
+ return -1, -1, -1, -1, -1, -1, -1, -1
|
|
|
+}
|
|
|
+
|
|
|
+func CountNeedToFinishNewLuaAllNum(query map[string]interface{}) (result int) {
|
|
|
+ defer qu.Catch()
|
|
|
+ sess := util.MgoEB.GetMgoConn()
|
|
|
+ defer util.MgoEB.DestoryMongoConn(sess)
|
|
|
+ ch := make(chan bool, 2)
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
+ lock := &sync.Mutex{}
|
|
|
+ field := map[string]interface{}{
|
|
|
+ "code": 1,
|
|
|
+ }
|
|
|
+ it := sess.DB(util.MgoEB.DbName).C("luaconfig").Find(&query).Select(&field).Iter()
|
|
|
+ n := 0
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
|
|
|
+ ch <- true
|
|
|
+ wg.Add(1)
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-ch
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+ code := qu.ObjToString(tmp["code"])
|
|
|
+ count := util.MgoE.Count("lua_logs_auditor", map[string]interface{}{"code": code, "types": "审核"})
|
|
|
+ if count == 0 { //无审核记录表示新爬虫
|
|
|
+ lock.Lock()
|
|
|
+ result++
|
|
|
+ lock.Unlock()
|
|
|
+ }
|
|
|
+ }(tmp)
|
|
|
+ if n%100 == 0 {
|
|
|
+ qu.Debug("current:", n)
|
|
|
+ }
|
|
|
+ tmp = map[string]interface{}{}
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+ return
|
|
|
+}
|