apple 5 years ago
parent
commit
1b8ade1704
3 changed files with 44 additions and 80 deletions
  1. 2 0
      udpfilterdup/src/README.md
  2. 7 2
      udpfilterdup/src/dataMethod.go
  3. 35 78
      udpfilterdup/src/dataMethodHeavy.go

+ 2 - 0
udpfilterdup/src/README.md

@@ -1,4 +1,6 @@
 基于内存的信息重复过滤
+"extract": "result_file_20200410",
+"extract_back": "result_file_20200409",
 
 {
     "udpport": ":11485",

+ 7 - 2
udpfilterdup/src/dataMethod.go

@@ -113,7 +113,7 @@ func isBidopentimeInterval(i1 int64 ,i2 int64) bool {
 	day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
 	if day1==day2 {
 		//是否间隔超过六小时
-		if math.Abs(float64(i1-i2)) >21600.0 {
+		if math.Abs(float64(i1-i2)) >43200.0 {
 			return true
 		}else {
 			return false
@@ -165,11 +165,16 @@ func leadingElementSame(v *Info, info *Info) bool {
 		isok++
 	}
 
-	if isok==5 {
+	if v.winner == info.winner&&info.winner != "" {
+		isok++
+	}
+
+	if isok>=5 {
 		return true
 	}
 
 
+
 	return false
 }
 

+ 35 - 78
udpfilterdup/src/dataMethodHeavy.go

@@ -176,8 +176,13 @@ func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
 		p11 = true
 	}
 
-	if  (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) || (p1 && p2 && p10) ||
-		(p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
+	if info.subtype !=""&&(p1 && p3 && p11)  {
+		reason = reason + "满足招标A,3要素组合-" + ss + ","
+		return true, reason
+	}
+
+	if  (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
+		(p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
 		(p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
 		(p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
 		(p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
@@ -285,6 +290,7 @@ func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
 	}
 
 	if 	(p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
+		(p1 && p2 && p11)|| (p1 && p3 && p11)||
 		(p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
 		(p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
 		(p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
@@ -338,16 +344,17 @@ func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
 func winningRepeat_C(v *Info, info *Info) bool {
 
 	if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
+		//避免抽错金额- title+name+winner
+		if ((v.projectcode!=""&&info.projectcode!=""&&v.projectcode==info.projectcode)||
+			(v.contractnumber!=""&&info.contractnumber!=""&&v.contractnumber==info.contractnumber)) &&
+			(v.winner!=""&&info.winner!=""&&v.winner==info.winner) {
+			return false
+		}
 		return true
 	}
-	//
 	if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
 		return true
 	}
-	//原始地址...
-	//if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
-	//	return true
-	//}
 	return false
 }
 
@@ -415,17 +422,13 @@ func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
 		return false,reason
 	}
 	//首先判定是否为低质量数据    info目标数据
-	if info.agency==v.agency&&info.title!=""&&
-		info.title==v.title &&
-		info.projectname==""&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
+	if info.title!=""&&(info.agency==""||v.agency=="")&&
+		info.title==v.title&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
 		isValue:=0//五要素判断
-		if info.budget != 0 {//预算
+		if info.projectname != "" {//项目名称
 			isValue++
 		}
-		if info.bidopentime != 0{//开标时间
-			isValue++
-		}
-		if info.bidopenaddress!=""{//开标地点
+		if info.budget != 0 {//预算
 			isValue++
 		}
 		if info.winner != ""{//中标单位
@@ -435,7 +438,7 @@ func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
 			isValue++
 		}
 		if isValue==0 {
-			reason = reason + "---低质量-要素均为空-标题包含关系"
+			reason = reason + "---低质量-要素均为空-标题满足"
 			return true, reason
 		}else if isValue==1 {
 			isMeet := false
@@ -450,70 +453,24 @@ func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
 	return false,reason
 }
 
+
 //类别细节原因记录
 func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
-	if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
-		info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
-		info.subtype == "变更" || info.subtype == "其他" {
-		//招标结果
-		if info.budget != 0 && info.budget == v.budget{//预算
-			reason = reason + "---招标类:预算"
-			return true,reason
-		}
-		if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
-			reason = reason + "---招标类:开标时间"
-			return true,reason
-		}
-		if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
-			reason = reason + "---招标类:开标地点"
-			return true,reason
-		}
-	} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
-		//中标结果
-		if v.winner != "" && info.winner == v.winner{//中标单位
-			reason = reason + "---中标类:中标单位"
-			return true,reason
-		}
-		if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
-			reason = reason + "---中标类:中标金额"
-			return true,reason
-		}
-	} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
-		//合同
-		if info.budget != 0 && info.budget == v.budget{//预算
-			reason = reason + "---合同类:预算"
-			return true,reason
-		}
-		if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
-			reason = reason + "---合同类:开标时间"
-			return true,reason
-		}
-		if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
-			reason = reason + "---合同类:开标地点"
-			return true,reason
-		}
-		if v.winner != "" && info.winner == v.winner{//中标单位
-			reason = reason + "---合同类:中标单位"
-			return true,reason
-		}
-		if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
-			reason = reason + "---合同类:中标金额"
-			return true,reason
-		}
-	} else {
-		//招标结果
-		if info.budget != 0 && info.budget == v.budget{//预算
-			reason = reason + "---类别空-招标类:预算"
-			return true,reason
-		}
-		if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
-			reason = reason + "---类别空-招标类:开标时间"
-			return true,reason
-		}
-		if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
-			reason = reason + "---类别空-招标类:开标地点"
-			return true,reason
-		}
+	if info.projectname!="" && info.projectname == v.projectname{//项目名称
+		reason = reason + "---项目名称"
+		return true,reason
+	}
+	if info.budget != 0 && info.budget == v.budget{//预算
+		reason = reason + "---预算"
+		return true,reason
+	}
+	if v.winner != "" && info.winner == v.winner{//中标单位
+		reason = reason + "---中标单位"
+		return true,reason
+	}
+	if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
+		reason = reason + "---中标金额"
+		return true,reason
 	}
 	return false,reason
 }