Jelajahi Sumber

消息队列大小调整;心跳、翻页统计调整

maxiaoshan 1 tahun lalu
induk
melakukan
2e2d775462
3 mengubah file dengan 23 tambahan dan 25 penghapusan
  1. 2 2
      src/spider/msgservice.go
  2. 18 23
      src/spider/script.go
  3. 3 0
      src/spider/store.go

+ 2 - 2
src/spider/msgservice.go

@@ -40,8 +40,8 @@ func InitMsgClient(serveraddr, name string) {
 			log.Println("重连成功")
 		},
 		CanHandleEvents: []int{mu.SERVICE_DOWNLOAD_APPEND_NODE, mu.SERVICE_DOWNLOAD_DELETE_NODE, util.Config.Uploadevent},
-		ReadBufferSize:  500,
-		WriteBufferSize: 500,
+		ReadBufferSize:  2000,
+		WriteBufferSize: 2000,
 	})
 	go gc4Alldownloader()
 }

+ 18 - 23
src/spider/script.go

@@ -55,12 +55,10 @@ type Script struct {
 	LastThreeTimes     []time.Duration //单条信息流程完成的时间,最后三次
 	FileLastThreeTimes []time.Duration //附件下载单条信息流程完成的时间,最后三次
 	//
-	AlreadyGetPageHeart       map[int]bool //记录某页已完成心跳记录
-	MaxPage                   int          //采集最大页
-	PageOneTextHash           string       //爬虫第一页页面内容hash
-	PageTwoTextHash           string       //爬虫第二页页面内容hash
-	RecordedHeartInfo         bool         //每轮次列表页是否已采集到数据标记(避免记录频率过快)
-	ContinueDownListChildTask bool         //是否是爬虫的子任务
+	MaxPage                   int    //采集最大页
+	PageOneTextHash           string //爬虫第一页页面内容hash
+	PageTwoTextHash           string //爬虫第二页页面内容hash
+	ContinueDownListChildTask bool   //是否是爬虫的子任务
 }
 
 const (
@@ -547,27 +545,24 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		table := S.ToTable(-1)
 		if table.Len() > 0 {
 			//爬虫翻页
-			if s.MaxPage > 1 {
-				if pageno == 1 && s.PageOneTextHash == "" { //记录第一页数据的hash
-					textMap := util.TableToMap(table)
-					textByte, err := json.Marshal(textMap)
-					text := string(textByte)
-					if err == nil && text != "" {
-						s.PageOneTextHash = util.HexText(text)
-					}
-				} else if pageno == 2 && s.PageTwoTextHash == "" {
-					textMap := util.TableToMap(table)
-					textByte, err := json.Marshal(textMap)
-					text := string(textByte)
-					if err == nil && text != "" {
-						s.PageTwoTextHash = util.HexText(text)
-					}
+			if pageno == 1 && s.PageOneTextHash == "" { //记录第一页数据的hash
+				textMap := util.TableToMap(table)
+				textByte, err := json.Marshal(textMap)
+				text := string(textByte)
+				if err == nil && text != "" {
+					s.PageOneTextHash = util.HexText(text)
+				}
+			} else if pageno == 2 && s.PageTwoTextHash == "" {
+				textMap := util.TableToMap(table)
+				textByte, err := json.Marshal(textMap)
+				text := string(textByte)
+				if err == nil && text != "" {
+					s.PageTwoTextHash = util.HexText(text)
 				}
 			}
 			//爬虫心跳
-			if !s.ContinueDownListChildTask && !s.RecordedHeartInfo {
+			if !s.ContinueDownListChildTask {
 				UpdateHeart(*site, *channel, code, *user, "findlist", false) //记录列表页实际采集数据量心跳
-				s.RecordedHeartInfo = true
 			}
 		}
 		//}

+ 3 - 0
src/spider/store.go

@@ -272,6 +272,9 @@ func UpdateHeart(site, channel, code, user, t string, firstpage bool) {
 		}
 		if t == "list" {
 			heart.ListHeart = time.Now().Unix()
+			if firstpage {
+				heart.FirstPageHeart = time.Now().Unix()
+			}
 		} else if t == "findlist" {
 			heart.FindListHeart = time.Now().Unix()
 		} else if t == "detail" {