|
@@ -0,0 +1,321 @@
|
|
|
+package luatask
|
|
|
+
|
|
|
+import (
|
|
|
+ "bytes"
|
|
|
+ "fmt"
|
|
|
+ "github.com/tealeg/xlsx"
|
|
|
+ qu "qfw/util"
|
|
|
+ gm "qfw/util/mail"
|
|
|
+ "strconv"
|
|
|
+ "sync"
|
|
|
+ "time"
|
|
|
+ "util"
|
|
|
+)
|
|
|
+
|
|
|
+func SpiderWeeklyReport() {
|
|
|
+ defer qu.Catch()
|
|
|
+ sTime := util.GetTime(-7)
|
|
|
+ eTime := util.GetTime(0)
|
|
|
+ ssTime := util.GetTime(-14)
|
|
|
+ qu.Debug(ssTime, sTime, eTime)
|
|
|
+ //1、任务相关
|
|
|
+ //本周新建任务数量
|
|
|
+ query := map[string]interface{}{
|
|
|
+ "l_comeintime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ thisWeekCreateTaskAllNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("本周新建任务数量:", thisWeekCreateTaskAllNum)
|
|
|
+ //上周未核实任务数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "l_comeintime": map[string]interface{}{
|
|
|
+ "$gte": ssTime,
|
|
|
+ "$lt": sTime,
|
|
|
+ },
|
|
|
+ "i_state": map[string]interface{}{
|
|
|
+ "$lte": 1, //任务状态:待确认、待处理
|
|
|
+ },
|
|
|
+ }
|
|
|
+ lastWeekNoCheckTaskAllNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("上周未核实任务数量:", lastWeekNoCheckTaskAllNum)
|
|
|
+ //本周核实任务数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "l_checktime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "i_state": map[string]interface{}{
|
|
|
+ "$gte": 2, //任务状态:处理中、待审核、审核通过、未通过、关闭
|
|
|
+ },
|
|
|
+ }
|
|
|
+ thisWeekCheckTaskAllNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("本周核实任务数量:", thisWeekCheckTaskAllNum)
|
|
|
+ //完成进度(本周核实量/(存量待核实+本周核实量))
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "i_state": map[string]interface{}{
|
|
|
+ "$lte": 1, //任务状态:待确认、待处理
|
|
|
+ },
|
|
|
+ }
|
|
|
+ noCheckTaskAllNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("存量待核实任务数量:", noCheckTaskAllNum)
|
|
|
+ checkTaskCompleteSchedule := float64(thisWeekCheckTaskAllNum) / float64(noCheckTaskAllNum+thisWeekCheckTaskAllNum)
|
|
|
+ resultCheckTaskCompleteSchedule, _ := strconv.ParseFloat(fmt.Sprintf("%.4f", checkTaskCompleteSchedule), 64)
|
|
|
+ qu.Debug("任务审核完成进度:", resultCheckTaskCompleteSchedule)
|
|
|
+
|
|
|
+ //2、lua新增爬虫
|
|
|
+ //本周新建爬虫数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "platform": "golua平台",
|
|
|
+ }
|
|
|
+ thisWeekCreateLuaAllNum := util.MgoEB.Count("luaconfig", query)
|
|
|
+ qu.Debug("本周lua新建爬虫数量:", thisWeekCreateLuaAllNum)
|
|
|
+ //上周新建爬虫未完成数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$gte": ssTime,
|
|
|
+ "$lt": sTime,
|
|
|
+ },
|
|
|
+ "platform": "golua平台",
|
|
|
+ "event": 7000,
|
|
|
+ "state": map[string]interface{}{
|
|
|
+ "$lte": 2, //待完成、待审核、未通过
|
|
|
+ },
|
|
|
+ }
|
|
|
+ lastWeekNoFinishLuaAllNum := CountLastWeekNoFinishNewLuaAllNum(query)
|
|
|
+ qu.Debug("上周lua新建爬虫未完成数量:", lastWeekNoFinishLuaAllNum)
|
|
|
+ //本周已完成新建爬虫数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ thisWeekFinishLuaAllNum := util.MgoE.Count("lua_logs_auditor_new", query)
|
|
|
+ qu.Debug("本周lua已完成新建爬虫数量:", thisWeekFinishLuaAllNum)
|
|
|
+ //完成进度(本周完成量/(存量待完成+本周完成量))
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "event": 7000,
|
|
|
+ "platform": "golua平台",
|
|
|
+ "state": map[string]interface{}{
|
|
|
+ "$lte": 2, //待完成、待审核、未通过
|
|
|
+ },
|
|
|
+ }
|
|
|
+ noFinishLuaAllNum := CountNoFinishLuaAllNum(query)
|
|
|
+ qu.Debug("存量待完成新建爬虫数量:", noFinishLuaAllNum)
|
|
|
+ luaCompleteSchedule := float64(thisWeekFinishLuaAllNum) / float64(noFinishLuaAllNum+thisWeekFinishLuaAllNum)
|
|
|
+ resultLuaCompleteSchedule, _ := strconv.ParseFloat(fmt.Sprintf("%.4f", luaCompleteSchedule), 64)
|
|
|
+ qu.Debug("lua新建爬虫完成进度:", resultLuaCompleteSchedule)
|
|
|
+
|
|
|
+ //3、lua历史维护
|
|
|
+ //本周新增待维护任务数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "l_checktime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "i_state": map[string]interface{}{ //处理中、待审核、审核通过、未通过
|
|
|
+ "$gte": 2,
|
|
|
+ "$lte": 5,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ thisWeekHistoryLuaAllNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("本周新增待维护任务数量:", thisWeekHistoryLuaAllNum)
|
|
|
+ //上周分发未完成任务数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "l_checktime": map[string]interface{}{
|
|
|
+ "$gte": ssTime,
|
|
|
+ "$lt": sTime,
|
|
|
+ },
|
|
|
+ "i_state": map[string]interface{}{ //处理中、待审核、未通过
|
|
|
+ "$in": []int{2, 3, 5},
|
|
|
+ },
|
|
|
+ }
|
|
|
+ lastWeekHistoryNoFinishLuaAllNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("上周分发未完成任务数量:", lastWeekHistoryNoFinishLuaAllNum)
|
|
|
+ //本周完成待维护任务数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "l_uploadtime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "i_state": 4, //审核通过
|
|
|
+ }
|
|
|
+ thisWeekHistoryFinishLuaAllNum := util.MgoE.Count("task", query)
|
|
|
+ qu.Debug("本周完成待维护任务数量:", thisWeekHistoryFinishLuaAllNum)
|
|
|
+ //完成进度(本周维护量/(存量待维护+本周维护量))
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "i_state": map[string]interface{}{
|
|
|
+ "$in": []int{2, 3, 5}, //处理中、待审核、未通过
|
|
|
+ },
|
|
|
+ }
|
|
|
+ historyNoFinishLuaAllNum := util.MgoE.Count("task", query)
|
|
|
+ luaHistoryCompleteSchedule := float64(thisWeekHistoryFinishLuaAllNum) / float64(historyNoFinishLuaAllNum+thisWeekHistoryFinishLuaAllNum)
|
|
|
+ resultLuaHistoryCompleteSchedule, _ := strconv.ParseFloat(fmt.Sprintf("%.4f", luaHistoryCompleteSchedule), 64)
|
|
|
+ qu.Debug("任务完成进度:", resultLuaHistoryCompleteSchedule)
|
|
|
+
|
|
|
+ //4、python爬虫
|
|
|
+ //本周新建爬虫数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ "platform": "python",
|
|
|
+ }
|
|
|
+ thisWeekCreatePythonAllNum := util.MgoEB.Count("luaconfig", query)
|
|
|
+ qu.Debug("本周python新建爬虫数量:", thisWeekCreatePythonAllNum)
|
|
|
+ //上周新建爬虫未完成数量
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$gte": ssTime,
|
|
|
+ "$lt": sTime,
|
|
|
+ },
|
|
|
+ "platform": "python",
|
|
|
+ "state": map[string]interface{}{
|
|
|
+ "$ne": 11,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ lastWeekNoFinishPythonAllNum := util.MgoEB.Count("luaconfig", query)
|
|
|
+ qu.Debug("上周python新建爬虫未完成数量:", lastWeekNoFinishPythonAllNum)
|
|
|
+ //lastWeekCreatePythonAllNum := util.MgoEB.Count("luaconfig", query)
|
|
|
+ //lastWeekNoFinishPythonAllNum := lastWeekCreatePythonAllNum - thisWeekFinishPythonAllNum
|
|
|
+ //本周已完成爬虫数量(无审核日志,暂时无法统计准确)
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "platform": "python",
|
|
|
+ "state": 11,
|
|
|
+ "modifytime": map[string]interface{}{
|
|
|
+ "$gte": sTime,
|
|
|
+ "$lt": eTime,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ thisWeekFinishPythonAllNum := util.MgoEB.Count("luaconfig", query)
|
|
|
+ qu.Debug("本周python已完成爬虫数量:", thisWeekFinishPythonAllNum)
|
|
|
+ //完成进度(本周完成量/(存量待完成+本周完成量))
|
|
|
+ query = map[string]interface{}{
|
|
|
+ "platform": "python",
|
|
|
+ "state": map[string]interface{}{
|
|
|
+ "$ne": 11,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ historyNoFinishPythonAllNum := util.MgoEB.Count("luaconfig", query)
|
|
|
+ pythonHistoryCompleteSchedule := float64(thisWeekFinishPythonAllNum) / float64(historyNoFinishPythonAllNum+thisWeekFinishPythonAllNum)
|
|
|
+ resultPythonHistoryCompleteSchedule, _ := strconv.ParseFloat(fmt.Sprintf("%.4f", pythonHistoryCompleteSchedule), 64)
|
|
|
+ qu.Debug("python新建爬虫完成进度:", resultPythonHistoryCompleteSchedule)
|
|
|
+
|
|
|
+ //生成excel
|
|
|
+ file, err := xlsx.OpenFile("res/report.xlsx")
|
|
|
+ if err != nil {
|
|
|
+ qu.Debug("Open Report File Error:", err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ sheet := file.Sheets[0]
|
|
|
+ taskRow := sheet.Rows[1]
|
|
|
+ taskRow.Cells[1].SetValue(thisWeekCreateTaskAllNum)
|
|
|
+ taskRow.Cells[2].SetValue(lastWeekNoCheckTaskAllNum)
|
|
|
+ taskRow.Cells[3].SetValue(thisWeekCheckTaskAllNum)
|
|
|
+ taskRow.Cells[4].SetValue(fmt.Sprint(resultCheckTaskCompleteSchedule*100) + "%")
|
|
|
+ newLuaRow := sheet.Rows[2]
|
|
|
+ newLuaRow.Cells[1].SetValue(thisWeekCreateLuaAllNum)
|
|
|
+ newLuaRow.Cells[2].SetValue(lastWeekNoFinishLuaAllNum)
|
|
|
+ newLuaRow.Cells[3].SetValue(thisWeekFinishLuaAllNum)
|
|
|
+ newLuaRow.Cells[4].SetValue(fmt.Sprint(resultLuaCompleteSchedule*100) + "%")
|
|
|
+ historyLuaRow := sheet.Rows[3]
|
|
|
+ historyLuaRow.Cells[1].SetValue(thisWeekHistoryLuaAllNum)
|
|
|
+ historyLuaRow.Cells[2].SetValue(lastWeekHistoryNoFinishLuaAllNum)
|
|
|
+ historyLuaRow.Cells[3].SetValue(thisWeekHistoryFinishLuaAllNum)
|
|
|
+ historyLuaRow.Cells[4].SetValue(fmt.Sprint(resultLuaHistoryCompleteSchedule*100) + "%")
|
|
|
+ pythonRow := sheet.Rows[4]
|
|
|
+ pythonRow.Cells[1].SetValue(thisWeekCreatePythonAllNum)
|
|
|
+ pythonRow.Cells[2].SetValue(lastWeekNoFinishPythonAllNum)
|
|
|
+ pythonRow.Cells[3].SetValue(thisWeekFinishPythonAllNum)
|
|
|
+ pythonRow.Cells[4].SetValue(fmt.Sprint(resultPythonHistoryCompleteSchedule*100) + "%")
|
|
|
+ mw := &util.MyWrite{
|
|
|
+ Byte: &bytes.Buffer{},
|
|
|
+ }
|
|
|
+ file.Write(mw)
|
|
|
+ bt := mw.Byte.Bytes()
|
|
|
+ now := time.Now()
|
|
|
+ name := qu.FormatDate(&now, qu.Date_Short_Layout) + "爬虫统计周报.xlsx"
|
|
|
+ gm.GSendMail_Bq("jy@jianyu360.cn", "maxiaoshan@topnet.net.cn", "", "", "爬虫统计周报", "", name, bt, util.GMail)
|
|
|
+}
|
|
|
+
|
|
|
+//上周新建爬虫未完成数量
|
|
|
+func CountLastWeekNoFinishNewLuaAllNum(query map[string]interface{}) (result int) {
|
|
|
+ defer qu.Catch()
|
|
|
+ sess := util.MgoEB.GetMgoConn()
|
|
|
+ defer util.MgoEB.DestoryMongoConn(sess)
|
|
|
+ ch := make(chan bool, 2)
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
+ lock := &sync.Mutex{}
|
|
|
+ field := map[string]interface{}{
|
|
|
+ "code": 1,
|
|
|
+ }
|
|
|
+ it := sess.DB(util.MgoEB.DbName).C("luaconfig").Find(&query).Select(&field).Iter()
|
|
|
+ n := 0
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
|
|
|
+ ch <- true
|
|
|
+ wg.Add(1)
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-ch
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+ code := qu.ObjToString(tmp["code"])
|
|
|
+ count := util.MgoE.Count("lua_logs_auditor_new", map[string]interface{}{"code": code})
|
|
|
+ if count == 0 { //新爬虫审核记录表中有记录表示已经审核上架过,无论现在爬虫什么状态,视为历史爬虫
|
|
|
+ lock.Lock()
|
|
|
+ result++
|
|
|
+ lock.Unlock()
|
|
|
+ }
|
|
|
+ }(tmp)
|
|
|
+ if n%10 == 0 {
|
|
|
+ qu.Debug("current:", n)
|
|
|
+ }
|
|
|
+ tmp = map[string]interface{}{}
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func CountNoFinishLuaAllNum(query map[string]interface{}) (result int) {
|
|
|
+ defer qu.Catch()
|
|
|
+ sess := util.MgoEB.GetMgoConn()
|
|
|
+ defer util.MgoEB.DestoryMongoConn(sess)
|
|
|
+ ch := make(chan bool, 2)
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
+ lock := &sync.Mutex{}
|
|
|
+ field := map[string]interface{}{
|
|
|
+ "code": 1,
|
|
|
+ }
|
|
|
+ it := sess.DB(util.MgoEB.DbName).C("luaconfig").Find(&query).Select(&field).Iter()
|
|
|
+ n := 0
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
|
|
|
+ ch <- true
|
|
|
+ wg.Add(1)
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-ch
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+ code := qu.ObjToString(tmp["code"])
|
|
|
+ count := util.MgoE.Count("lua_logs_auditor", map[string]interface{}{"code": code, "types": "审核"})
|
|
|
+ if count == 0 { //无审核记录表示新爬虫
|
|
|
+ lock.Lock()
|
|
|
+ result++
|
|
|
+ lock.Unlock()
|
|
|
+ }
|
|
|
+ }(tmp)
|
|
|
+ if n%100 == 0 {
|
|
|
+ qu.Debug("current:", n)
|
|
|
+ }
|
|
|
+ tmp = map[string]interface{}{}
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+ return
|
|
|
+}
|