|
@@ -5,6 +5,7 @@ import (
|
|
|
"github.com/donnie4w/go-logger/logger"
|
|
|
"go.mongodb.org/mongo-driver/bson"
|
|
|
qu "qfw/util"
|
|
|
+ "strconv"
|
|
|
"sync"
|
|
|
"time"
|
|
|
"util"
|
|
@@ -79,6 +80,8 @@ type NewSpider struct {
|
|
|
Channel_Status int `bson:"channel_status"` //栏目响应状态
|
|
|
//补充信息
|
|
|
Comeintime int64 `bson:"comeintime"`
|
|
|
+ //列表页数据存储表
|
|
|
+ ListDataColl string `json:"listdatacoll"`
|
|
|
//异常汇总
|
|
|
//Error map[string]*ErrorInfo `json:"error"`
|
|
|
ErrType string `bson:"errtype"` //记录权重最高的异常类型
|
|
@@ -180,6 +183,7 @@ func getCodeBaseInfo() {
|
|
|
}
|
|
|
sp.Working = util.CodeEventWorking[sp.Working]
|
|
|
sp.Model = util.CodeEventModel[sp.Event]
|
|
|
+ sp.ListDataColl = util.CodeListDataColl[sp.Event]
|
|
|
sp.MaxPage = maxPage
|
|
|
if sp.Platform == "python" {
|
|
|
sp.Model = 1
|
|
@@ -709,6 +713,7 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
"i_times": 1,
|
|
|
"l_comeintime": 1,
|
|
|
}
|
|
|
+ count, failRate := sp.getCodeFailDataCount()
|
|
|
list, _ := util.MgoEB.Find("task", query, nil, fields, false, -1, -1)
|
|
|
update := []map[string]interface{}{}
|
|
|
if list != nil && len(*list) > 0 { //已有任务
|
|
@@ -729,6 +734,8 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
comeintime_old := qu.Int64All(task["l_comeintime"]) //历史任务创建时间
|
|
|
|
|
|
result := map[string]interface{}{
|
|
|
+ "i_count": count,
|
|
|
+ "f_failrate": failRate,
|
|
|
"i_event": sp.Event,
|
|
|
"l_updatetime": time.Now().Unix(),
|
|
|
"i_times": times_old + 1,
|
|
@@ -779,6 +786,8 @@ func createTask(sp *NewSpider, taskArr *[][]map[string]interface{}, lock *sync.M
|
|
|
"l_complete": util.CompleteTime("1"),
|
|
|
//"s_urgency": "1",
|
|
|
"s_platform": sp.Platform,
|
|
|
+ "i_count": count,
|
|
|
+ "f_failrate": failRate,
|
|
|
}
|
|
|
update = append(update, query)
|
|
|
update = append(update, saveMap)
|
|
@@ -1147,6 +1156,27 @@ func (sp *NewSpider) getErrHrefs(coll, errType string, query map[string]interfac
|
|
|
}
|
|
|
return
|
|
|
}
|
|
|
+func (sp *NewSpider) getCodeFailDataCount() (int, float64) {
|
|
|
+ //7日总下载量
|
|
|
+ query := map[string]interface{}{
|
|
|
+ "spidercode": sp.Code,
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$gte": util.GetTime(-7),
|
|
|
+ "$lte": util.GetTime(0),
|
|
|
+ },
|
|
|
+ }
|
|
|
+ allCount := util.MgoS.Count(sp.ListDataColl, query)
|
|
|
+ if allCount == 0 {
|
|
|
+ return allCount, float64(0)
|
|
|
+ }
|
|
|
+ //7日下载失败量
|
|
|
+ query["state"] = -1
|
|
|
+ failCount := util.MgoS.Count(sp.ListDataColl, query)
|
|
|
+
|
|
|
+ avg := float64(failCount) / float64(allCount)
|
|
|
+ value, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", avg*100), 64)
|
|
|
+ return allCount, value
|
|
|
+}
|
|
|
|
|
|
//更新爬虫
|
|
|
func updateLuaconfig() {
|