|
@@ -257,13 +257,12 @@ L:
|
|
|
data := d.data[k]
|
|
|
d.lock.Unlock()
|
|
|
if len(data) > 0 { //对比v 找到同类型,同省或全国的数据作对比
|
|
|
- //log.Println(info.area,info.subtype,k)
|
|
|
for _, v := range data {
|
|
|
reason = ""
|
|
|
if v.id == info.id { //正常重复
|
|
|
return false, v, ""
|
|
|
}
|
|
|
- if info.site != "" {
|
|
|
+ if info.site != "" {//站点临时赋值
|
|
|
sitelock.Lock()
|
|
|
dict := SiteMap[info.site]
|
|
|
sitelock.Unlock()
|
|
@@ -290,34 +289,36 @@ L:
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ specialNum:= dealWithSpecialWordNumber(info,v)
|
|
|
//前置条件2 - 标题相关,有且一个关键词
|
|
|
- if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
|
|
|
- info.title != v.title && v.title != "" && info.title != "" {
|
|
|
- continue
|
|
|
+ if specialNum==1 {
|
|
|
+ if info.title != v.title && v.title != "" && info.title != "" {
|
|
|
+ continue
|
|
|
+ }
|
|
|
}
|
|
|
-
|
|
|
//前置条件3 - 标题相关,均含有关键词
|
|
|
- if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
|
|
|
- len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
|
|
|
-
|
|
|
- letter1,letter2:=v.title,info.title
|
|
|
- res, _ := regexp.Compile("[0-9a-zA-Z]+");
|
|
|
- if res.MatchString(letter1)||res.MatchString(letter2) {
|
|
|
- letter1=convertArabicNumeralsAndLetters(letter1)
|
|
|
- letter2=convertArabicNumeralsAndLetters(letter2)
|
|
|
- }
|
|
|
- if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
|
|
|
- letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
|
|
|
- }
|
|
|
- if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
|
- continue
|
|
|
- }else {
|
|
|
- reason = reason + "标题关键词且包含关系"
|
|
|
- if !againRepeat(v, info) {//继续二级金额判断
|
|
|
- b = true
|
|
|
- source = v
|
|
|
- reasons = reason
|
|
|
- break L
|
|
|
+ if specialNum==2 {
|
|
|
+ if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
|
|
|
+ v.title != "" && info.title != "" {
|
|
|
+ letter1,letter2:=v.title,info.title
|
|
|
+ res, _ := regexp.Compile("[0-9a-zA-Z]+");
|
|
|
+ if res.MatchString(letter1)||res.MatchString(letter2) {
|
|
|
+ letter1=convertArabicNumeralsAndLetters(letter1)
|
|
|
+ letter2=convertArabicNumeralsAndLetters(letter2)
|
|
|
+ }
|
|
|
+ if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
|
|
|
+ letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
|
|
|
+ }
|
|
|
+ if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
|
+ continue
|
|
|
+ }else {
|
|
|
+ reason = reason + "标题关键词且包含关系"
|
|
|
+ if !againRepeat(v, info) {//继续二级金额判断
|
|
|
+ b = true
|
|
|
+ source = v
|
|
|
+ reasons = reason
|
|
|
+ break L
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -470,7 +471,6 @@ func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
|
isAreaExist = true
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
if !isAreaExist {
|
|
|
areaArr := d.areakeys
|
|
|
areaArr = append(areaArr,newData.area)
|
|
@@ -553,7 +553,17 @@ func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
|
|
|
}
|
|
|
return newStr1,newStr2
|
|
|
}
|
|
|
-
|
|
|
+//关键词数量v
|
|
|
+func dealWithSpecialWordNumber(info *Info, v*Info) int {
|
|
|
+ okNum:=0
|
|
|
+ if info.titleSpecialWord || info.specialWord {
|
|
|
+ okNum++
|
|
|
+ }
|
|
|
+ if v.titleSpecialWord || v.specialWord {
|
|
|
+ okNum++
|
|
|
+ }
|
|
|
+ return okNum
|
|
|
+}
|
|
|
|
|
|
|
|
|
|