|
@@ -263,90 +263,90 @@ L:
|
|
|
if v.id == info.id { //正常重复
|
|
|
return false, v, ""
|
|
|
}
|
|
|
- //if v.id == "5c761a4fa5cb26b9b73d9512" &&info.id=="5c767bd1a5cb26b9b7a61597" {
|
|
|
- // log.Println("测试数据")
|
|
|
- //}
|
|
|
-
|
|
|
- if info.subtype == v.subtype {
|
|
|
- if info.site != "" {
|
|
|
- sitelock.Lock()
|
|
|
- dict := SiteMap[info.site]
|
|
|
- sitelock.Unlock()
|
|
|
- if dict != nil {
|
|
|
- if info.area == "全国" && dict["area"] != "" {
|
|
|
- info.is_site = true
|
|
|
- info.area = qutil.ObjToString(dict["area"])
|
|
|
- info.city = qutil.ObjToString(dict["city"])
|
|
|
- } else {
|
|
|
- if info.city == "" && dict["city"] != "" {
|
|
|
- info.is_site = true
|
|
|
- info.area = qutil.ObjToString(dict["area"])
|
|
|
- info.city = qutil.ObjToString(dict["city"])
|
|
|
- }
|
|
|
- }
|
|
|
+ if info.site != "" {
|
|
|
+ sitelock.Lock()
|
|
|
+ dict := SiteMap[info.site]
|
|
|
+ sitelock.Unlock()
|
|
|
+ if dict != nil {
|
|
|
+ if (info.area == "全国" && dict["area"] != "")||
|
|
|
+ (info.city == "" && dict["city"] != ""){
|
|
|
+ info.is_site = true
|
|
|
+ info.area = qutil.ObjToString(dict["area"])
|
|
|
+ info.city = qutil.ObjToString(dict["city"])
|
|
|
}
|
|
|
}
|
|
|
- //前置条件1 - 站点相关
|
|
|
- if info.site != "" && info.site == v.site {
|
|
|
- if info.href != "" && info.href == v.href {
|
|
|
- reason = "href相同"
|
|
|
- b = true
|
|
|
- source = v
|
|
|
- reasons = reason
|
|
|
- break L
|
|
|
- }
|
|
|
- if info.href != "" && info.href != v.href {
|
|
|
- reason = "href不同-"
|
|
|
- }
|
|
|
+ }
|
|
|
+ //前置条件1 - 站点相关
|
|
|
+ if info.site != "" && info.site == v.site {
|
|
|
+ if info.href != "" && info.href == v.href {
|
|
|
+ reason = "href相同"
|
|
|
+ b = true
|
|
|
+ source = v
|
|
|
+ reasons = reason
|
|
|
+ break L
|
|
|
}
|
|
|
-
|
|
|
- //前置条件2 - 标题相关,有且一个关键词
|
|
|
- if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
|
|
|
- info.title != v.title && v.title != "" && info.title != "" {
|
|
|
- continue
|
|
|
+ if info.href != "" && info.href != v.href {
|
|
|
+ reason = "href不同-"
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- //前置条件3 - 标题相关,均含有关键词
|
|
|
- if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
|
|
|
- len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
|
|
|
+ //前置条件2 - 标题相关,有且一个关键词
|
|
|
+ if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
|
|
|
+ info.title != v.title && v.title != "" && info.title != "" {
|
|
|
+ continue
|
|
|
+ }
|
|
|
|
|
|
- letter1,letter2:=v.title,info.title
|
|
|
- res, _ := regexp.Compile("[0-9a-zA-Z]+");
|
|
|
- if res.MatchString(letter1)||res.MatchString(letter2) {
|
|
|
- letter1=convertArabicNumeralsAndLetters(letter1)
|
|
|
- letter2=convertArabicNumeralsAndLetters(letter2)
|
|
|
- }
|
|
|
- if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
|
|
|
- letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
|
|
|
- }
|
|
|
- if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
|
- continue
|
|
|
- }else {
|
|
|
- reason = reason + "标题关键词且包含关系"
|
|
|
- if !againRepeat(v, info) {//继续二级金额判断
|
|
|
- b = true
|
|
|
- source = v
|
|
|
- reasons = reason
|
|
|
- break L
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ //前置条件3 - 标题相关,均含有关键词
|
|
|
+ if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
|
|
|
+ len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
|
|
|
|
|
|
- //新增快速数据过少判重
|
|
|
- if LowHeavy {
|
|
|
- repeat := false
|
|
|
- if repeat, reason = fastLowQualityHeavy(v, info, reason); repeat {
|
|
|
+ letter1,letter2:=v.title,info.title
|
|
|
+ res, _ := regexp.Compile("[0-9a-zA-Z]+");
|
|
|
+ if res.MatchString(letter1)||res.MatchString(letter2) {
|
|
|
+ letter1=convertArabicNumeralsAndLetters(letter1)
|
|
|
+ letter2=convertArabicNumeralsAndLetters(letter2)
|
|
|
+ }
|
|
|
+ if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
|
|
|
+ letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
|
|
|
+ }
|
|
|
+ if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
|
+ continue
|
|
|
+ }else {
|
|
|
+ reason = reason + "标题关键词且包含关系"
|
|
|
+ if !againRepeat(v, info) {//继续二级金额判断
|
|
|
b = true
|
|
|
source = v
|
|
|
reasons = reason
|
|
|
break L
|
|
|
}
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
+ //新增快速数据过少判重
|
|
|
+ if LowHeavy {
|
|
|
+ repeat := false
|
|
|
+ if repeat, reason = fastLowQualityHeavy(v, info, reason); repeat {
|
|
|
+ b = true
|
|
|
+ source = v
|
|
|
+ reasons = reason
|
|
|
+ break L
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- //代理机构相同-非空相等
|
|
|
- if v.agency != "" && info.agency != "" && v.agency == info.agency {
|
|
|
- reason = reason + "同机构-"
|
|
|
+ //代理机构相同-非空相等
|
|
|
+ if v.agency != "" && info.agency != "" && v.agency == info.agency {
|
|
|
+ reason = reason + "同机构-"
|
|
|
+ repeat := false
|
|
|
+ if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
|
|
|
+ b = true
|
|
|
+ source = v
|
|
|
+ reasons = reason
|
|
|
+ break L
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ reason = reason + "非同机构-"
|
|
|
+ if info.city != "" && info.city == v.city {
|
|
|
+ reason = reason + "同城-"
|
|
|
repeat := false
|
|
|
if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
|
|
|
b = true
|
|
@@ -355,25 +355,13 @@ L:
|
|
|
break L
|
|
|
}
|
|
|
} else {
|
|
|
- reason = reason + "非同机构-"
|
|
|
- if info.city != "" && info.city == v.city {
|
|
|
- reason = reason + "同城-"
|
|
|
- repeat := false
|
|
|
- if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
|
|
|
- b = true
|
|
|
- source = v
|
|
|
- reasons = reason
|
|
|
- break L
|
|
|
- }
|
|
|
- } else {
|
|
|
- reason = reason + "不同城-"
|
|
|
- repeat := false
|
|
|
- if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
|
|
|
- b = true
|
|
|
- source = v
|
|
|
- reasons = reason
|
|
|
- break L
|
|
|
- }
|
|
|
+ reason = reason + "不同城-"
|
|
|
+ repeat := false
|
|
|
+ if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
|
|
|
+ b = true
|
|
|
+ source = v
|
|
|
+ reasons = reason
|
|
|
+ break L
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -474,6 +462,21 @@ func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
|
d.data[k] = data
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ //添加省
|
|
|
+ isAreaExist :=false
|
|
|
+ for _,v:= range d.areakeys {
|
|
|
+ if v==newData.area {
|
|
|
+ isAreaExist = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if !isAreaExist {
|
|
|
+ areaArr := d.areakeys
|
|
|
+ areaArr = append(areaArr,newData.area)
|
|
|
+ d.areakeys = areaArr
|
|
|
+ }
|
|
|
+
|
|
|
d.lock.Unlock()
|
|
|
}
|
|
|
|
|
@@ -804,38 +807,38 @@ func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
|
|
|
var ss string
|
|
|
p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
|
|
|
if v.projectname != "" && v.projectname == info.projectname {
|
|
|
- ss = ss + "p1(名称)-"
|
|
|
+ ss = ss + "p1-名称-"
|
|
|
p1 = true
|
|
|
}
|
|
|
if v.buyer != "" && v.buyer == info.buyer {
|
|
|
- ss = ss + "p2(单位)-"
|
|
|
+ ss = ss + "p2-单位-"
|
|
|
p2 = true
|
|
|
}
|
|
|
if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
|
|
|
(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
|
|
|
- ss = ss + "p3(编号组)-"
|
|
|
+ ss = ss + "p3-编号组-"
|
|
|
p3 = true
|
|
|
}
|
|
|
if v.budget != 0 && v.budget == info.budget {
|
|
|
- ss = ss + "p4(预算)-"
|
|
|
+ ss = ss + "p4-预算-"
|
|
|
p4 = true
|
|
|
}
|
|
|
if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
|
|
|
- ss = ss + "p9(开标时间)-"
|
|
|
+ ss = ss + "p9-开标时间-"
|
|
|
p9 = true
|
|
|
}
|
|
|
if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
|
|
|
- ss = ss + "p10(开标地点)-"
|
|
|
+ ss = ss + "p10-开标地点-"
|
|
|
p10 = true
|
|
|
}
|
|
|
if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
|
|
|
(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
|
|
|
- ss = ss + "p11(标题)-"
|
|
|
+ ss = ss + "p11-标题-"
|
|
|
p11 = true
|
|
|
}
|
|
|
|
|
|
- if (p1 && p2 && p4) || (p1 && p2 && p9) ||
|
|
|
- (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
|
|
|
+ if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) || (p1 && p2 && p10) ||
|
|
|
+ (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
|
|
|
(p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
|
|
|
(p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
|
|
|
(p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
|
|
@@ -914,31 +917,31 @@ func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
|
|
|
var ss string
|
|
|
p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
|
|
|
if v.projectname != "" && v.projectname == info.projectname {
|
|
|
- ss = ss + "p1(项目名称)-"
|
|
|
+ ss = ss + "p1-项目名称-"
|
|
|
p1 = true
|
|
|
}
|
|
|
if v.buyer != "" && v.buyer == info.buyer {
|
|
|
- ss = ss + "p2(单位)-"
|
|
|
+ ss = ss + "p2-单位-"
|
|
|
p2 = true
|
|
|
}
|
|
|
if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
|
|
|
(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
|
|
|
- ss = ss + "p3(编号组)-"
|
|
|
+ ss = ss + "p3-编号组--"
|
|
|
p3 = true
|
|
|
}
|
|
|
if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
|
|
|
- ss = ss + "p5(中标金)-"
|
|
|
+ ss = ss + "p5-中标金-"
|
|
|
p5 = true
|
|
|
}
|
|
|
if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
|
|
|
- ss = ss + "p6(中标人)-"
|
|
|
+ ss = ss + "p6-中标人-"
|
|
|
p6 = true
|
|
|
}
|
|
|
|
|
|
|
|
|
if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
|
|
|
(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
|
|
|
- ss = ss + "p11(标题)-"
|
|
|
+ ss = ss + "p11-标题-"
|
|
|
p11 = true
|
|
|
}
|
|
|
|