apple пре 5 година
родитељ
комит
96d110eaf6
3 измењених фајлова са 168 додато и 71 уклоњено
  1. 2 2
      udpfilterdup/src/config.json
  2. 156 67
      udpfilterdup/src/datamap.go
  3. 10 2
      udpfilterdup/src/main.go

+ 2 - 2
udpfilterdup/src/config.json

@@ -5,8 +5,8 @@
         "addr": "192.168.3.207:27092",
         "pool": 5,
         "db": "extract_kf",
-        "extract": "zk_Copy_bidding_copy",
-        "extract_back": "zk_Copy_bidding_copy",
+        "extract": "zk_xiufu_test01",
+        "extract_back": "zk_xiufu_test01",
         "site": {
             "dbname": "extract_kf",
             "coll": "site"

+ 156 - 67
udpfilterdup/src/datamap.go

@@ -271,6 +271,10 @@ L:
 				}
 
 
+				//buyer 优先级高,有值且不相等过滤
+				if info.buyer!=""&&v.buyer!=""&&info.buyer!=v.buyer {
+					continue
+				}
 
 				if info.site != "" {//站点临时赋值
 					sitelock.Lock()
@@ -285,7 +289,9 @@ L:
 						}
 					}
 				}
-				//前置条件1 - 站点相关
+
+
+				//前置条件 - 站点相关
 				if info.site != "" && info.site == v.site {
 					if info.href != "" && info.href == v.href {
 						reason = "同站点-href相同"
@@ -302,6 +308,8 @@ L:
 								source = v
 								reasons = reason
 								break L
+							}else {
+								continue
 							}
 						}else {
 							continue
@@ -310,7 +318,7 @@ L:
 				}
 
 				specialNum:= dealWithSpecialWordNumber(info,v)
-				//前置条件2 - 标题相关,有且一个关键词
+				//前置条件 - 标题相关,有且一个关键词
 				if specialNum==1 {
 					if info.title != v.title && v.title != "" && info.title != "" {
 						continue
@@ -346,6 +354,18 @@ L:
 					}
 				}
 
+
+				//前置条件-五要素均相等
+				if leadingElementSame(v,info) {
+					reason = "五要素-相同-满足"
+					b = true
+					source = v
+					reasons = reason
+					break L
+				}
+
+
+
 				//新增快速数据过少判重
 				if LowHeavy {
 					repeat := false
@@ -495,8 +515,8 @@ func (d *datamap) update(t int64) {
 	if TimingTask {
 		d.keymap = d.GetLatelyFiveDay(t)
 	}else {
-		//d.keymap = d.GetLatelyFiveDay(t)//测试数据采用
-		d.keymap = d.GetLatelyFiveDayDouble(t)
+		d.keymap = d.GetLatelyFiveDay(t)//测试数据采用
+		//d.keymap = d.GetLatelyFiveDayDouble(t)
 	}
 	m := map[string]bool{}
 	for _, v := range d.keymap {
@@ -932,9 +952,9 @@ func tenderRepeat_C(v *Info, info *Info) bool {
 		return true
 	}
 	//原始地址...
-	if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
-		return true
-	}
+	//if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
+	//	return true
+	//}
 
 	if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime,v.bidopentime) {
 		return true
@@ -1041,9 +1061,9 @@ func winningRepeat_C(v *Info, info *Info) bool {
 		return true
 	}
 	//原始地址...
-	if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
-		return true
-	}
+	//if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
+	//	return true
+	//}
 	return false
 }
 
@@ -1099,41 +1119,56 @@ func contractRepeat_C(v *Info, info *Info) bool {
 
 //同站点再次判断
 func againHrefRepeat(v *Info, info *Info) bool {
-	if v.buyer == info.buyer {
-		if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
-			info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
-			info.subtype == "变更" || info.subtype == "其他" {
-			//招标结果
-			if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-				return true
-			}
-		} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
-			//中标结果
-			if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
-				(deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
-				return true
-			}
-		} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
-			//合同
-			if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-				return true
-			}
-			if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
-				(deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
-				return true
-			}
-			if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
-				return true
-			}
-			if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
-				return true
-			}
-		} else {
-			if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-				return true
-			}
-		}
+	//if v.buyer == info.buyer {
+	//
+	//}
+	if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
+		return true
+	}
+	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
+		return true
 	}
+	if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
+		return true
+	}
+	if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
+		return true
+	}
+	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
+		return true
+	}
+	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
+		return true
+	}
+
+	//if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
+	//	info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
+	//	info.subtype == "变更" || info.subtype == "其他" {
+	//	//招标结果
+	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
+	//		return true
+	//	}
+	//} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
+	//	//中标结果
+	//	if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
+	//		(deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
+	//		return true
+	//	}
+	//} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
+	//	//合同
+	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
+	//		return true
+	//	}
+	//	if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
+	//		(deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
+	//		return true
+	//	}
+	//
+	//} else {
+	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
+	//		return true
+	//	}
+	//}
 
 	return false
 }
@@ -1141,32 +1176,53 @@ func againHrefRepeat(v *Info, info *Info) bool {
 
 
 
-//关键词再次金额判断
+//关键词再次判断
 func againRepeat(v *Info, info *Info) bool {
 	//相同采购单位下
 	//if info.buyer != "" && v.buyer == info.buyer {
-	if v.buyer == info.buyer {
-		if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
-			info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
-			info.subtype == "其他" || info.subtype == "变更" {
-			//预算金额满足条件
-			if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-				return true
-			}
-		} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" ||
-			info.subtype == "流标" || info.subtype == "合同" || info.subtype == "验收" ||
-			info.subtype == "违规" {
-			//中标金额单位满足条件
-			if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
-				(deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
-				return true
-			}
-		} else {
-			//预算金额满足条件
-			if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-				return true
-			}
-		}
+	//if v.buyer == info.buyer {
+	//
+	//}
+
+	//if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
+	//	info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
+	//	info.subtype == "其他" || info.subtype == "变更" {
+	//	//预算金额满足条件
+	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
+	//		return true
+	//	}
+	//} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" ||
+	//	info.subtype == "流标" || info.subtype == "合同" || info.subtype == "验收" ||
+	//	info.subtype == "违规" {
+	//	//中标金额单位满足条件
+	//	if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
+	//		(deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
+	//		return true
+	//	}
+	//} else {
+	//	//预算金额满足条件
+	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
+	//		return true
+	//	}
+	//}
+
+	if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
+		return true
+	}
+	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
+		return true
+	}
+	if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
+		return true
+	}
+	if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
+		return true
+	}
+	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
+		return true
+	}
+	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
+		return true
 	}
 
 	return false
@@ -1235,6 +1291,39 @@ func isTheSameDay(i1 int64 ,i2 int64) bool {
 
 
 
+//前置0 五要素均相等认为重复
+func leadingElementSame(v *Info, info *Info) bool {
+
+	isok:= 0
+	if info.projectname != "" && v.projectname == info.projectname {
+		isok++
+	}
+	if info.buyer != "" && v.buyer == info.buyer {
+		isok++
+	}
+	if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
+		if info.contractnumber != "" && v.contractnumber == info.contractnumber {
+			isok++
+		}
+	}else {
+		if info.projectcode != "" && v.projectcode == info.projectcode {
+			isok++
+		}
+	}
+	if info.title != "" && v.title == info.title {
+		isok++
+	}
+	if v.agency == info.agency {
+		isok++
+	}
+
+	if isok==5 {
+		return true
+	}
+
+
+	return false
+}
 
 
 

+ 10 - 2
udpfilterdup/src/main.go

@@ -129,8 +129,8 @@ func mainT() {
 	} else {
 		//2019年8月1日-8月17日  712646
 		IdType = true
-		sid = "5d41607aa5cb26b9b734fe30"
-		eid = "5eb172e1f2c1a7850bad1c39"
+		sid = "5d55031fa5cb26b9b7f57570"
+		eid = "5e8c02b150b5ea296eed4509"
 		log.Println("正常判重测试开始")
 		log.Println(sid, "---", eid)
 		mapinfo := map[string]interface{}{}
@@ -210,6 +210,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 	}
 	it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort(sortName).Iter()
 	updateExtract := [][]map[string]interface{}{}
+	ids:=""
 	log.Println("线程数:", threadNum)
 	pool := make(chan bool, threadNum)
 	wg := &sync.WaitGroup{}
@@ -352,6 +353,12 @@ func task(data []byte, mapInfo map[string]interface{}) {
 							repeat_idMap["_id"] = source.id
 						}
 						repeat_id = info.id
+
+						if ids=="" {
+							ids=source.id
+						}else {
+							ids=ids+","+source.id
+						}
 					}
 				}
 				//if repeateN%150==0&&repeateN>0 {
@@ -398,6 +405,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 				"lteid": eid,
 				"stype": util.ObjToString(to["stype"]),
 				"key":   key,
+				"ids":   ids,
 			})
 			addr := &net.UDPAddr{
 				IP:   net.ParseIP(to["addr"].(string)),