Selaa lähdekoodia

'spider_heart、spider_downloadrate增加site和channel'

maxiaoshan 3 vuotta sitten
vanhempi
commit
75daaf5d12
2 muutettua tiedostoa jossa 26 lisäystä ja 21 poistoa
  1. 3 0
      src/spider/handler.go
  2. 23 21
      src/spider/spider.go

+ 3 - 0
src/spider/handler.go

@@ -958,6 +958,7 @@ func NewSpider(code, luafile string) (*Spider, string) {
 	spider.Code = spider.GetVar("spiderCode")
 	spider.SCode = spider.Code
 	spider.Name = spider.GetVar("spiderName")
+	spider.Channel = spider.GetVar("spiderChannel")
 
 	//spider.LastExecTime = GetLastExectime(spider.Code)
 	spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
@@ -1236,6 +1237,8 @@ func SaveHeartInfo() {
 			update := []map[string]interface{}{}
 			update = append(update, map[string]interface{}{"code": code})
 			update = append(update, map[string]interface{}{"$set": map[string]interface{}{
+				"site":          heart.Site,
+				"channel":       heart.Channel,
 				"list":          heart.ListHeart,
 				"detail":        heart.DetailHeart,
 				"detailexecute": heart.DetailExecuteHeart,

+ 23 - 21
src/spider/spider.go

@@ -34,6 +34,8 @@ type Heart struct {
 	DetailExecuteHeart int64  //三级页采集到数据心跳
 	ListHeart          int64  //爬虫列表页执行心跳
 	ModifyUser         string //爬虫维护人
+	Site               string //站点
+	Channel            string //栏目
 }
 
 //爬虫()
@@ -41,6 +43,7 @@ type Spider struct {
 	Script
 	Code                            string //代码
 	Name                            string //名称
+	Channel                         string //站点
 	DownDetail                      bool   //是否下载详细页
 	Stop                            bool   //停止标志
 	Pass                            bool   //暂停标志
@@ -81,7 +84,7 @@ var TimeChan = make(chan bool, 1)
 var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
 
 //心跳
-func UpdateHeart(code, user, t string) {
+func UpdateHeart(site, channel, code, user, t string) {
 	if htmp, ok := SpiderHeart.Load(code); ok {
 		if heart, ok := htmp.(*Heart); ok {
 			if t == "list" {
@@ -93,22 +96,19 @@ func UpdateHeart(code, user, t string) {
 			}
 		}
 	} else {
+		heart := &Heart{
+			ModifyUser: user,
+			Site:       site,
+			Channel:    channel,
+		}
 		if t == "list" {
-			SpiderHeart.Store(code, &Heart{
-				ListHeart:  time.Now().Unix(),
-				ModifyUser: user,
-			})
+			heart.ListHeart = time.Now().Unix()
 		} else if t == "detail" {
-			SpiderHeart.Store(code, &Heart{
-				DetailHeart: time.Now().Unix(),
-				ModifyUser:  user,
-			})
+			heart.DetailHeart = time.Now().Unix()
 		} else if t == "detailexcute" {
-			SpiderHeart.Store(code, &Heart{
-				DetailExecuteHeart: time.Now().Unix(),
-				ModifyUser:         user,
-			})
+			heart.DetailExecuteHeart = time.Now().Unix()
 		}
+		SpiderHeart.Store(code, heart)
 	}
 }
 
@@ -158,8 +158,8 @@ func (s *Spider) ExecJob(reload bool) {
 	if err != nil {
 		logger.Error(s.Code, err)
 	}
-	UpdateHeart(s.Code, s.MUserName, "list") //记录所有节点列表页心跳
-	err = s.DownListPageItem()               //下载列表
+	UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "list") //记录所有节点列表页心跳
+	err = s.DownListPageItem()                                  //下载列表
 	if err != nil {
 		logger.Error(s.Code, err)
 	}
@@ -364,6 +364,8 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 	nowTime := time.Now()
 	sDate := qu.FormatDate(&nowTime, qu.Date_Short_Layout)
 	set := map[string]interface{}{
+		"site":       s.Name,
+		"channel":    s.Channel,
 		"spidercode": s.Code,
 		"updatetime": nowTime.Unix(),
 		"event":      util.Config.Uploadevent,
@@ -507,7 +509,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 		SaveHighListPageData(paramdata, href, num)
 		return
 	} else {
-		UpdateHeart(s.Code, s.MUserName, "detail") //记录modal=0老模式采集三级页心跳
+		UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=0老模式采集三级页心跳
 		isExist, _ := util.ExistRedis("title_repeat_judgement", 0, "url_repeat_"+href)
 		if isExist { //更新redis生命周期
 			util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*30)
@@ -542,7 +544,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 	if t1 > time.Now().Unix() { //防止发布时间超前
 		data["publishtime"] = time.Now().Unix()
 	}
-	UpdateHeart(s.Code, s.MUserName, "detailexcute") //记录modal=0老模式采集到数据心跳
+	UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=0老模式采集到数据心跳
 	delete(data, "exit")
 	delete(data, "checkpublishtime")
 	data["comeintime"] = time.Now().Unix()
@@ -697,7 +699,7 @@ func (s *Spider) DownloadHighDetail() {
 				"comeintime": 0,
 				"event":      0,
 			}
-			UpdateHeart(s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
+			UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
 			list, _ := Mgo.Find("spider_highlistdata", q, o, f, false, 0, 100)
 			if list != nil && len(*list) > 0 {
 				for _, tmp := range *list {
@@ -725,7 +727,7 @@ func (s *Spider) DownloadHighDetail() {
 					}
 					//下载、解析、入库
 					data, err = s.DownloadDetailPage(tmp, data)
-					UpdateHeart(s.Code, s.MUserName, "detailexcute") //记录modal=1下载数据心跳
+					UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=1下载数据心跳
 					if err != nil || data == nil {
 						success = false
 						times++
@@ -819,7 +821,7 @@ func (s *Spider) DownloadListDetail() {
 		"comeintime": 0,
 		"event":      0,
 	}
-	UpdateHeart(s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
+	UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
 	list, _ := Mgo.Find("spider_highlistdata", q, o, f, false, 0, 100)
 	if list != nil && len(*list) > 0 {
 		for _, tmp := range *list {
@@ -847,7 +849,7 @@ func (s *Spider) DownloadListDetail() {
 			}
 			//下载、解析、入库
 			data, err = s.DownloadDetailPage(tmp, data)
-			UpdateHeart(s.Code, s.MUserName, "detailexcute") //记录modal=1下载数据心跳
+			UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=1下载数据心跳
 			if err != nil || data == nil {
 				success = false
 				times++