apple il y a 4 ans
Parent
commit
6997d29cff

+ 11 - 1
udpfilterdup/src/dataMethod.go

@@ -8,7 +8,6 @@ import (
 )
 
 
-
 //完善判重数据检测-前置条件
 func convertArabicNumeralsAndLetters(data string) string {
 	newData :=data
@@ -129,6 +128,17 @@ func isBidWinningAmount(f1 float64 ,f2 float64) bool {
 }
 
 
+//时间间隔周期
+func isTimeIntervalPeriod(i1 int64 ,i2 int64) bool {
+
+	if math.Abs(float64(i1-i2)) < 172800.0 {
+		return true
+	}else {
+		return false //大于48小时
+	}
+}
+
+
 //开标时间区间为一天
 func isBidopentimeInterval(i1 int64 ,i2 int64) bool {
 	if i1==0||i2==0 {

+ 21 - 7
udpfilterdup/src/dataMethodHeavy.go

@@ -66,7 +66,18 @@ func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
 //判重方法2
 func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
 	isMeet := false
-	if v.agency == info.agency && v.agency != "" && info.agency != "" {
+	isAgency :=false
+	//招标类-代理机构不同-广泛前后缀比较
+	if v.agency != info.agency && v.agency != "" && info.agency != "" {
+		//新增一层判断
+		if strings.Contains(v.agency, info.agency) || strings.Contains(info.agency, v.agency) {
+			isAgency = true
+		}else {
+			return false, reason
+		}
+	}
+
+	if (v.agency == info.agency && v.agency != "" && info.agency != "")|| isAgency {
 		if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
 			info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
 			info.subtype == "变更" || info.subtype == "其他" {
@@ -122,11 +133,7 @@ func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
 		}
 	}
 
-	//不同
-	if v.agency != info.agency && v.agency != "" && info.agency != "" {
-		return false, reason
-	}
-	//机构最少一个为空
+	//机构最2少一个为空
 	if v.agency == "" || info.agency == "" {
 		var repeat = false
 		if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
@@ -409,9 +416,16 @@ func contractRepeat_C(v *Info, info *Info) bool {
 
 //快速低质量数据判重
 func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
-	if !isTheSameDay(v.publishtime,info.publishtime) {
+	//if !isTheSameDay(v.publishtime,info.publishtime) {
+	//	return false,reason
+	//}
+
+	//区间间隔24小时
+	if !isTimeIntervalPeriod(v.publishtime,info.publishtime) {
 		return false,reason
 	}
+
+
 	//首先判定是否为低质量数据    info目标数据
 	if info.title!=""&&(info.agency==""||v.agency=="")&&
 		info.title==v.title&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {

+ 14 - 0
udpfilterdup/src/datamap.go

@@ -198,6 +198,12 @@ func NewDatamap(days int, lastid string) *datamap {
 //数据构建
 func NewInfo(tmp map[string]interface{}) *Info {
 	subtype := qutil.ObjToString(tmp["subtype"])
+	if subtype=="招标"||subtype=="邀标"||subtype=="询价"||
+		subtype=="竞谈"||subtype=="竞价" {
+		subtype = "招标"
+	}
+
+
 	area := qutil.ObjToString(tmp["area"])
 	if area == "A" {
 		area = "全国"
@@ -356,6 +362,14 @@ L:
 						}else {
 							if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
 								//无包含关系-即不相等
+								////特殊 标段X 类直接过滤
+								//if info.titleSpecialWord && v.titleSpecialWord {
+								//	continue
+								//}else {
+								//	if againRepeat(v, info) {
+								//		continue
+								//	}
+								//}
 								if againRepeat(v, info) {
 									continue
 								}