apple 5 anos atrás
pai
commit
0b8b98c9a0
3 arquivos alterados com 90 adições e 21 exclusões
  1. 1 1
      udpfilterdup/src/config.json
  2. 82 15
      udpfilterdup/src/datamap.go
  3. 7 5
      udpfilterdup/src/main.go

+ 1 - 1
udpfilterdup/src/config.json

@@ -27,7 +27,7 @@
     "timingPubScope": 720,
     "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
     "specialtitle_0": "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789][))]?",
-    "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批)",
+    "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批|期)",
     "specialtitle_2": "项目[((][0-9a-zA-Z一二三四五六七八九十零123456789][))]",
     "beifen": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研"
 }

+ 82 - 15
udpfilterdup/src/datamap.go

@@ -311,6 +311,10 @@ L:
 					//前置条件3 - 标题相关,均含有关键词
 					if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
 						len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
+						//判断是否为 1、 重招组
+
+
+
 						if !(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
 							continue //无包含关系
 						}
@@ -513,6 +517,41 @@ func (d *datamap) GetLatelyFiveDay(t int64) []string  {
 ******** 以下为判重 ********
 **************************
  */
+ 
+ //完善判重数据监测-前置条件
+func convertArabicNumeralsAndLetters(data string) string {
+	newData :=data
+	res1, _ := regexp.Compile("[a-zA-Z]+");
+	if res1.MatchString(data) {
+		newData = res1.ReplaceAllStringFunc(data, strings.ToUpper);
+	}
+	res2, _ := regexp.Compile("[0-9]+");
+	if res2.MatchString(newData) {
+		arr1:=[]string {"0","1","2","3","4","5","6","7","8","9"}
+		arr2:=[]string {"零","一","二","三","四","五","六","七","八","九"}
+		for i:=0 ;i<len(arr1) ;i++  {
+			resTemp ,_:=regexp.Compile(arr1[i])
+			newData= resTemp.ReplaceAllString(newData, arr2[i]);
+		}
+	}
+	return newData
+}
+
+func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
+	newStr1:=str1
+	newStr2:=str2
+	res, _ := regexp.Compile("重新招标");
+	if res.MatchString(newStr1) {
+		newStr1 = res.ReplaceAllString(newStr1,"重招");
+	}
+	if res.MatchString(newStr2) {
+		newStr2 = res.ReplaceAllString(newStr2,"重招");
+	}
+	return newStr1,newStr2
+}
+
+
+ 
 
  //快速低质量数据判重
 func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
@@ -863,12 +902,12 @@ func tenderRepeat_C(v *Info, info *Info) bool {
 	if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
 		return true
 	}
-	if v.bidopentime != 0 && info.bidopentime != 0 && v.bidopentime != info.bidopentime {
-		return true
-	}
-	if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress {
-		return true
-	}
+	//if v.bidopentime != 0 && info.bidopentime != 0 && v.bidopentime != info.bidopentime {
+	//	return true
+	//}
+	//if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress {
+	//	return true
+	//}
 
 	return false
 }
@@ -972,13 +1011,13 @@ func winningRepeat_C(v *Info, info *Info) bool {
 	//if v.bidamount != 0 && info.bidamount != 0 && v.bidamount != info.bidamount {
 	//	return true
 	//}
-	if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
-		return true
-	}
-
-	if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
-		return true
-	}
+	//if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
+	//	return true
+	//}
+	//
+	//if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
+	//	return true
+	//}
 	//原始地址...
 	if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
 		return true
@@ -1024,13 +1063,23 @@ func contractRepeat_C(v *Info, info *Info) bool {
 	if winningRepeat_C(v, info) {
 		return true
 	}
+
+	//合同类 - 新增编号
+	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
+		return true
+	}
+	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
+		return true
+	}
+
 	return false
 }
 
 //再次金额判断
 func againRepeat(v *Info, info *Info) bool {
 	//相同采购单位下
-	if info.buyer != "" && v.buyer == info.buyer {
+	//if info.buyer != "" && v.buyer == info.buyer {
+	if v.buyer == info.buyer {
 		if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
 			info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
 			info.subtype == "其他" || info.subtype == "变更" {
@@ -1077,4 +1126,22 @@ func isBidWinningAmount(f1 float64 ,f2 float64) bool {
 		return false
 	}
 	return true
-}
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 7 - 5
udpfilterdup/src/main.go

@@ -46,12 +46,12 @@ var (
 	TimingTask     bool                              //是否定时任务
 	timingSpanDay  int64                             //时间跨度
 	timingPubScope int64                             //发布时间周期
-	sid,eid,lastid       string                      //测试人员判重使用
-	IdType         bool    //默认object类型
+	sid,eid,lastid string                     		 //测试人员判重使用
+	IdType         bool   							 //默认object类型
 )
 
 func init() {
-	//5ea9a4800000000000000000
+	return
 	flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
 	flag.StringVar(&sid, "sid", "", "开始id")
 	flag.StringVar(&eid, "eid", "", "结束id")
@@ -105,7 +105,7 @@ func init() {
 	log.Printf("new站点加载用时:%d秒,%d个\n", int(time.Now().Unix())-start, len(SiteMap))
 }
 
-func main() {
+func mainT() {
 	go checkMapJob()
 	updport := Sysconfig["udpport"].(string)
 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
@@ -118,8 +118,10 @@ func main() {
 	time.Sleep(99999 * time.Hour)
 }
 
+
+
 //测试组人员使用
-func mainT() {
+func main() {
 
 	if TimingTask {
 		log.Println("定时任务测试开始")