apple 5 rokov pred
rodič
commit
5447f63cee
1 zmenil súbory, kde vykonal 39 pridanie a 29 odobranie
  1. 39 29
      udpfilterdup/src/datamap.go

+ 39 - 29
udpfilterdup/src/datamap.go

@@ -257,13 +257,12 @@ L:
 		data := d.data[k]
 		d.lock.Unlock()
 		if len(data) > 0 { //对比v   找到同类型,同省或全国的数据作对比
-			//log.Println(info.area,info.subtype,k)
 			for _, v := range data {
 				reason = ""
 				if v.id == info.id { //正常重复
 					return false, v, ""
 				}
-				if info.site != "" {
+				if info.site != "" {//站点临时赋值
 					sitelock.Lock()
 					dict := SiteMap[info.site]
 					sitelock.Unlock()
@@ -290,34 +289,36 @@ L:
 					}
 				}
 
+				specialNum:= dealWithSpecialWordNumber(info,v)
 				//前置条件2 - 标题相关,有且一个关键词
-				if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
-					info.title != v.title && v.title != "" && info.title != "" {
-					continue
+				if specialNum==1 {
+					if info.title != v.title && v.title != "" && info.title != "" {
+						continue
+					}
 				}
-
 				//前置条件3 - 标题相关,均含有关键词
-				if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
-					len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
-
-					letter1,letter2:=v.title,info.title
-					res, _ := regexp.Compile("[0-9a-zA-Z]+");
-					if res.MatchString(letter1)||res.MatchString(letter2) {
-						letter1=convertArabicNumeralsAndLetters(letter1)
-						letter2=convertArabicNumeralsAndLetters(letter2)
-					}
-					if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
-						letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
-					}
-					if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
-						continue
-					}else {
-						reason = reason + "标题关键词且包含关系"
-						if !againRepeat(v, info) {//继续二级金额判断
-							b = true
-							source = v
-							reasons = reason
-							break L
+				if specialNum==2 {
+					if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
+						v.title != "" && info.title != "" {
+						letter1,letter2:=v.title,info.title
+						res, _ := regexp.Compile("[0-9a-zA-Z]+");
+						if res.MatchString(letter1)||res.MatchString(letter2) {
+							letter1=convertArabicNumeralsAndLetters(letter1)
+							letter2=convertArabicNumeralsAndLetters(letter2)
+						}
+						if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
+							letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
+						}
+						if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
+							continue
+						}else {
+							reason = reason + "标题关键词且包含关系"
+							if !againRepeat(v, info) {//继续二级金额判断
+								b = true
+								source = v
+								reasons = reason
+								break L
+							}
 						}
 					}
 				}
@@ -470,7 +471,6 @@ func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
 			isAreaExist = true
 		}
 	}
-
 	if !isAreaExist {
 		areaArr := d.areakeys
 		areaArr = append(areaArr,newData.area)
@@ -553,7 +553,17 @@ func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
 	}
 	return newStr1,newStr2
 }
-
+//关键词数量v
+func dealWithSpecialWordNumber(info *Info,  v*Info) int {
+	okNum:=0
+	if  info.titleSpecialWord || info.specialWord {
+		okNum++
+	}
+	if  v.titleSpecialWord || v.specialWord {
+		okNum++
+	}
+	return okNum
+}