Просмотр исходного кода

新增列表页第一页执行心跳统计

maxiaoshan 1 год назад
Родитель
Сommit
5b3d3159b0
3 измененных файлов с 14 добавлено и 8 удалено
  1. 1 0
      src/spider/handler.go
  2. 3 2
      src/spider/script.go
  3. 10 6
      src/spider/spider.go

+ 1 - 0
src/spider/handler.go

@@ -1398,6 +1398,7 @@ func SaveHeartInfo() {
 				update = append(update, map[string]interface{}{"$set": map[string]interface{}{
 					"site":          heart.Site,
 					"channel":       heart.Channel,
+					"firstpage":     heart.FirstPageHeart,
 					"list":          heart.ListHeart,
 					"findlist":      heart.FindListHeart,
 					"detail":        heart.DetailHeart,

+ 3 - 2
src/spider/script.go

@@ -557,7 +557,7 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 			}
 			//爬虫心跳
 			if !s.RecordedHeartInfo {
-				UpdateHeart(*site, *channel, code, *user, "findlist") //记录列表页实际采集数据量心跳
+				UpdateHeart(*site, *channel, code, *user, "findlist", false) //记录列表页实际采集数据量心跳
 				s.RecordedHeartInfo = true
 			}
 		}
@@ -1159,6 +1159,7 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		head := S.ToTable(-2)
 		stype := S.ToString(-3)
 		path := S.ToString(-4)
+		proxy := S.ToBool(-5)
 		headMap := util.GetTable(head)
 		//qu.Debug("cookie----------", cookie)
 		//qu.Debug("headMap----------", headMap)
@@ -1167,7 +1168,7 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		if err == nil {
 			headJsonStr = string(headByte)
 		}
-		code, respHead, respCookie := codegrpc.GetCodeByPath(path, stype, headJsonStr, cookie)
+		code, respHead, respCookie := codegrpc.GetCodeByPath(path, stype, headJsonStr, cookie, proxy)
 		//qu.Debug("code====", code)
 		//qu.Debug("respHead====", respHead)
 		//qu.Debug("respCookie====", respCookie)

+ 10 - 6
src/spider/spider.go

@@ -31,6 +31,7 @@ type Heart struct {
 	DetailExecuteHeart int64  //三级页采集到数据心跳
 	FindListHeart      int64  //findListHtml执行心跳
 	ListHeart          int64  //爬虫列表页执行心跳
+	FirstPageHeart     int64  //采集第一页的心跳
 	ModifyUser         string //爬虫维护人
 	Site               string //站点
 	Channel            string //栏目
@@ -118,13 +119,16 @@ type DelaySite struct {
 }
 
 //心跳
-func UpdateHeart(site, channel, code, user, t string) {
+func UpdateHeart(site, channel, code, user, t string, firstpage bool) {
 	//sp, spiderOk := LoopListPath.Load(code)
 	//if spiderOk && sp != nil {
 	if htmp, ok := SpiderHeart.Load(code); ok {
 		if heart, ok := htmp.(*Heart); ok {
 			if t == "list" {
 				heart.ListHeart = time.Now().Unix()
+				if firstpage {
+					heart.FirstPageHeart = time.Now().Unix()
+				}
 			} else if t == "findlist" {
 				heart.FindListHeart = time.Now().Unix()
 			} else if t == "detail" {
@@ -299,7 +303,7 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 	}
 	for ; start <= max && !s.Stop; start++ {
 		if !s.Stop { //在下载详情页时爬虫下架,此时不再存心跳信息
-			UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "list") //记录所有节点列表页心跳
+			UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "list", start == 1) //记录所有节点列表页心跳
 		}
 		//logger.Info("爬虫:", s.Code, "重复页:", repeatPageNum, "	配置最大页:", tmpMax, "	最终最大页:", max, "	当前页:", start, "重复次数:", repeatPageTimes)
 		//if start > tmpMax && isRunRepeatList && repeatPageTimes >= 5 { //重复次数超过5次,不再翻页
@@ -655,7 +659,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 		return
 	} else {
 		if !s.Stop {
-			UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=0老模式采集三级页心跳
+			UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail", false) //记录modal=0老模式采集三级页心跳
 		}
 		isEsRepeat := false
 		if delaySite := DelaySiteMap[s.Name]; delaySite != nil && delaySite.Compete {
@@ -706,7 +710,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 	}
 	//详情页下载数据成功心跳
 	if !s.Stop {
-		UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=0老模式采集到数据心跳
+		UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute", false) //记录modal=0老模式采集到数据心跳
 	}
 	set := map[string]interface{}{"state": 1, "updatetime": time.Now().Unix()}
 	//详情页过滤数据
@@ -902,7 +906,7 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 		"event":      0,
 	}
 	if !isHistory && !s.Stop { //在下载详情页时爬虫下架,此时不再存心跳信息
-		UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
+		UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail", false) //记录modal=1采集三级页心跳
 	}
 	countNum := MgoS.Count(coll, q) //统计util.Config.DayNum天内未下载爬虫个数
 	if isHistory && countNum == 0 { //下载历史数据量为0,手动stop
@@ -995,7 +999,7 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 					//下载、解析、入库
 					data, err = sp.DownloadDetailPage(tmp, data)
 					if !isHistory && !sp.Stop && sp.IsMainThread { //在下载详情页时爬虫下架,此时不再存心跳信息
-						UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=1下载数据心跳
+						UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute", false) //记录modal=1下载数据心跳
 					}
 					if err != nil || data == nil {
 						success = false