maxiaoshan 3 years ago
parent
commit
d9ab0e706b
2 changed files with 13 additions and 4 deletions
  1. 3 2
      src/spider/msgservice.go
  2. 10 2
      src/spider/spider.go

+ 3 - 2
src/spider/msgservice.go

@@ -277,6 +277,7 @@ func SaveObj(event int, checkAtrr string, data map[string]interface{}, saveredis
 		href := fmt.Sprint(data["href"])
 		if len(href) > 5 && saveredis { //有效数据
 			db := HexToBigIntMod(href) //根据href的哈希值选择Redis的db
+			hashHref := HexText(href)
 			//增量
 			isExist, _ := util.ExistRedis("title_repeat_judgement", 0, "url_repeat_"+href)
 			id := ""
@@ -291,9 +292,9 @@ func SaveObj(event int, checkAtrr string, data map[string]interface{}, saveredis
 				util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*30)
 				if !flag { //保存服务发送成功
 					//全量(判断是否已存在防止覆盖id)
-					isExist, _ := util.ExistRedis("title_repeat_fulljudgement", db, "url_repeat_"+href)
+					isExist, _ := util.ExistRedis("title_repeat_fulljudgement", db, hashHref)
 					if !isExist {
-						util.PutRedis("title_repeat_fulljudgement", db, "url_repeat_"+href, "lua_"+id, -1)
+						util.PutRedis("title_repeat_fulljudgement", db, hashHref, "lua_"+id, -1)
 					}
 				}
 			}

+ 10 - 2
src/spider/spider.go

@@ -142,9 +142,10 @@ func (s *Spider) DownloadDetailItem(paramdata, tmp map[string]interface{}) {
 		util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*30)
 		//全量
 		db := HexToBigIntMod(href)
-		isExist, _ := util.ExistRedis("title_repeat_fulljudgement", db, "url_repeat_"+href)
+		hashHref := HexText(href)
+		isExist, _ := util.ExistRedis("title_repeat_fulljudgement", db, hashHref)
 		if !isExist {
-			util.PutRedis("title_repeat_fulljudgement", db, "url_repeat_"+href, "", -1)
+			util.PutRedis("title_repeat_fulljudgement", db, hashHref, "", -1)
 		}
 	}
 	//jsondata处理
@@ -280,3 +281,10 @@ func HexToBigIntMod(href string) int {
 	n, _ = n.SetString(hex[2:], 16)
 	return int(n.Mod(n, big.NewInt(16)).Int64())
 }
+
+//求hash
+func HexText(href string) string {
+	h := sha256.New()
+	h.Write([]byte(href))
+	return fmt.Sprintf("%x", h.Sum(nil))
+}