|
@@ -6,6 +6,7 @@ import (
|
|
qu "qfw/util"
|
|
qu "qfw/util"
|
|
"strings"
|
|
"strings"
|
|
"sync"
|
|
"sync"
|
|
|
|
+ "sync/atomic"
|
|
"time"
|
|
"time"
|
|
"util"
|
|
"util"
|
|
|
|
|
|
@@ -173,6 +174,7 @@ func StartTask() {
|
|
InitInfo() //初始化时间
|
|
InitInfo() //初始化时间
|
|
logger.Debug(StartTime, EndTime, Publishtime)
|
|
logger.Debug(StartTime, EndTime, Publishtime)
|
|
GetCodeBaseInfo() //初始化爬虫基本信息
|
|
GetCodeBaseInfo() //初始化爬虫基本信息
|
|
|
|
+ GetBiddingCount() //统计bidding表爬虫采集量
|
|
GetCodeHeart() //初始化爬虫心跳信息
|
|
GetCodeHeart() //初始化爬虫心跳信息
|
|
GetSpiderHighListDownloadNum() //统计spider_highlistdata爬虫列表页下载量、下载失败量、未下载量
|
|
GetSpiderHighListDownloadNum() //统计spider_highlistdata爬虫列表页下载量、下载失败量、未下载量
|
|
GetSpiderListDownloadNum() //统计spider_listdata爬虫列表页下载量、下载失败量、未下载量
|
|
GetSpiderListDownloadNum() //统计spider_listdata爬虫列表页下载量、下载失败量、未下载量
|
|
@@ -185,6 +187,7 @@ func StartTask() {
|
|
// GetDownloadNumber() //统计下载量
|
|
// GetDownloadNumber() //统计下载量
|
|
//CloseTask() //关闭任务
|
|
//CloseTask() //关闭任务
|
|
SendInfoToWxWork_SiteDataCount()
|
|
SendInfoToWxWork_SiteDataCount()
|
|
|
|
+ SendLuaPythonAllNum()
|
|
}
|
|
}
|
|
|
|
|
|
//初始化
|
|
//初始化
|
|
@@ -248,6 +251,7 @@ func GetCodeBaseInfo() {
|
|
"l_uploadtime": 1,
|
|
"l_uploadtime": 1,
|
|
"listisfilter": 1,
|
|
"listisfilter": 1,
|
|
"frequencyerrtimes": 1,
|
|
"frequencyerrtimes": 1,
|
|
|
|
+ "code": 1,
|
|
}
|
|
}
|
|
count := util.MgoE.Count("luaconfig", query)
|
|
count := util.MgoE.Count("luaconfig", query)
|
|
logger.Debug("共加载线上爬虫个数:", count)
|
|
logger.Debug("共加载线上爬虫个数:", count)
|
|
@@ -265,13 +269,14 @@ func GetCodeBaseInfo() {
|
|
Error: map[string]*ErrorInfo{},
|
|
Error: map[string]*ErrorInfo{},
|
|
}
|
|
}
|
|
if param_common, ok := tmp["param_common"].([]interface{}); ok && len(param_common) >= 6 {
|
|
if param_common, ok := tmp["param_common"].([]interface{}); ok && len(param_common) >= 6 {
|
|
- sp.Code = qu.ObjToString(param_common[0])
|
|
|
|
|
|
+ //sp.Code = qu.ObjToString(param_common[0])
|
|
sp.Site = qu.ObjToString(param_common[1])
|
|
sp.Site = qu.ObjToString(param_common[1])
|
|
sp.Channel = qu.ObjToString(param_common[2])
|
|
sp.Channel = qu.ObjToString(param_common[2])
|
|
sp.MaxPage = qu.IntAll(param_common[5])
|
|
sp.MaxPage = qu.IntAll(param_common[5])
|
|
} else {
|
|
} else {
|
|
logger.Debug("加载爬虫出错:", tmp["_id"])
|
|
logger.Debug("加载爬虫出错:", tmp["_id"])
|
|
}
|
|
}
|
|
|
|
+ sp.Code = qu.ObjToString(tmp["code"])
|
|
sp.ModifyUser = qu.ObjToString(tmp["modifyuser"])
|
|
sp.ModifyUser = qu.ObjToString(tmp["modifyuser"])
|
|
sp.ModifyId = qu.ObjToString(tmp["modifyuserid"])
|
|
sp.ModifyId = qu.ObjToString(tmp["modifyuserid"])
|
|
sp.AuditTime = qu.Int64All(tmp["l_uploadtime"])
|
|
sp.AuditTime = qu.Int64All(tmp["l_uploadtime"])
|
|
@@ -302,6 +307,54 @@ func GetCodeBaseInfo() {
|
|
logger.Debug("爬虫基本信息准备完成...", len(CodeInfoMap))
|
|
logger.Debug("爬虫基本信息准备完成...", len(CodeInfoMap))
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+func GetBiddingCount() {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ sess := util.MgoB.GetMgoConn()
|
|
|
|
+ defer util.MgoB.DestoryMongoConn(sess)
|
|
|
|
+ lock := &sync.Mutex{}
|
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
|
+ ch := make(chan bool, 5)
|
|
|
|
+ query := map[string]interface{}{
|
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
|
+ "$gte": StartTime,
|
|
|
|
+ "$lt": EndTime,
|
|
|
|
+ },
|
|
|
|
+ }
|
|
|
|
+ fieles := map[string]interface{}{
|
|
|
|
+ "spidercode": 1,
|
|
|
|
+ }
|
|
|
|
+ count := util.MgoB.Count("bidding", query)
|
|
|
|
+ logger.Debug("bidding采集数据量:", count)
|
|
|
|
+ it := sess.DB(util.MgoB.DbName).C("bidding").Find(&query).Select(&fieles).Iter()
|
|
|
|
+ n := 0
|
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
|
|
|
|
+ wg.Add(1)
|
|
|
|
+ ch <- true
|
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
|
+ defer func() {
|
|
|
|
+ <-ch
|
|
|
|
+ wg.Done()
|
|
|
|
+ }()
|
|
|
|
+ code := qu.ObjToString(tmp["spidercode"])
|
|
|
|
+ lock.Lock()
|
|
|
|
+ if sp := CodeInfoMap[code]; sp != nil {
|
|
|
|
+ if sp.Platform == "golua平台" {
|
|
|
|
+ LuaBiddingDownloadAllNum++
|
|
|
|
+ } else if sp.Platform == "python" {
|
|
|
|
+ PythonBiddingDownloadAllNum++
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ lock.Unlock()
|
|
|
|
+ }(tmp)
|
|
|
|
+ if n%1000 == 0 {
|
|
|
|
+ logger.Debug(n)
|
|
|
|
+ }
|
|
|
|
+ tmp = map[string]interface{}{}
|
|
|
|
+ }
|
|
|
|
+ wg.Wait()
|
|
|
|
+ logger.Debug("Bidding数据量统计完成...", LuaBiddingDownloadAllNum, PythonBiddingDownloadAllNum)
|
|
|
|
+}
|
|
|
|
+
|
|
// GetCodeHeart 获取爬虫的心跳信息
|
|
// GetCodeHeart 获取爬虫的心跳信息
|
|
func GetCodeHeart() {
|
|
func GetCodeHeart() {
|
|
defer qu.Catch()
|
|
defer qu.Catch()
|
|
@@ -1632,7 +1685,18 @@ func CreateTaskProcess() {
|
|
}
|
|
}
|
|
//根据爬虫信息新建任务
|
|
//根据爬虫信息新建任务
|
|
CreateTask(task, spider, &upsertBulk, lock) //比对历史任务,新建任务
|
|
CreateTask(task, spider, &upsertBulk, lock) //比对历史任务,新建任务
|
|
- //
|
|
|
|
|
|
+ if spider.Platform == "golua平台" {
|
|
|
|
+ //列表页总下载量
|
|
|
|
+ atomic.AddInt64(&LuaListDownloadAllNum, int64(spider.RepeatDownloadAllNum))
|
|
|
|
+ //列表页总下载成功量
|
|
|
|
+ atomic.AddInt64(&LuaListDownloadSuccessAllNum, int64(spider.RepeatDownloadSuccessNum))
|
|
|
|
+ } else {
|
|
|
|
+ //列表页总下载量
|
|
|
|
+ atomic.AddInt64(&PythonListDownloadAllNum, int64(spider.RepeatDownloadAllNum))
|
|
|
|
+ //列表页总下载成功量
|
|
|
|
+ atomic.AddInt64(&PythonListDownloadSuccessAllNum, int64(spider.RepeatDownloadSuccessNum))
|
|
|
|
+ }
|
|
|
|
+
|
|
lock.Lock()
|
|
lock.Lock()
|
|
if len(arr) > 500 {
|
|
if len(arr) > 500 {
|
|
util.MgoE.SaveBulk("luacodeinfo", arr...)
|
|
util.MgoE.SaveBulk("luacodeinfo", arr...)
|
|
@@ -1940,6 +2004,17 @@ func SaveCodeInfo() {
|
|
logger.Debug("Json UnMarshal Error", code)
|
|
logger.Debug("Json UnMarshal Error", code)
|
|
return
|
|
return
|
|
}
|
|
}
|
|
|
|
+ if sp.Platform == "golua平台" {
|
|
|
|
+ //列表页总下载量
|
|
|
|
+ atomic.AddInt64(&LuaListDownloadAllNum, int64(sp.RepeatDownloadAllNum))
|
|
|
|
+ //列表页总下载成功量
|
|
|
|
+ atomic.AddInt64(&LuaListDownloadSuccessAllNum, int64(sp.RepeatDownloadSuccessNum))
|
|
|
|
+ } else {
|
|
|
|
+ //列表页总下载量
|
|
|
|
+ atomic.AddInt64(&PythonListDownloadAllNum, int64(sp.RepeatDownloadAllNum))
|
|
|
|
+ //列表页总下载成功量
|
|
|
|
+ atomic.AddInt64(&PythonListDownloadSuccessAllNum, int64(sp.RepeatDownloadSuccessNum))
|
|
|
|
+ }
|
|
lock.Lock()
|
|
lock.Lock()
|
|
if len(arr) > 500 {
|
|
if len(arr) > 500 {
|
|
util.MgoE.SaveBulk("luacodeinfo_back", arr...)
|
|
util.MgoE.SaveBulk("luacodeinfo_back", arr...)
|