package main import ( "strings" ) // 判重方法1 func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) { isMeet := false if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" || info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" || info.subtype == "变更" || info.subtype == "其他" { //招标结果 if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet { if tenderRepeat_C(v, info) { return false, reason } else { reason = reason + "---招标类" return true, reason } } else { return false, reason } } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" { //中标结果 if isMeet, reason = winningRepeat_A(v, info, reason); isMeet { if winningRepeat_C(v, info) { return false, reason } else { reason = reason + "---中标类" return true, reason } } else { return false, reason } } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" { //合同 if isMeet, reason = contractRepeat_A(v, info, reason); isMeet { if contractRepeat_C(v, info) { return false, reason } else { reason = reason + "---合同类" return true, reason } } else { return false, reason } } else { //招标结果 if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet { if tenderRepeat_C(v, info) { return false, reason } else { reason = reason + "---类别空-招标类" return true, reason } } else { return false, reason } } return false, reason } // 判重方法2 func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) { isMeet := false isAgency := false //招标类-代理机构不同-广泛前后缀比较 if v.agency != info.agency && v.agency != "" && info.agency != "" { //新增一层判断 if strings.Contains(v.agency, info.agency) || strings.Contains(info.agency, v.agency) { isAgency = true } else { return false, reason } } if (v.agency == info.agency && v.agency != "" && info.agency != "") || isAgency { if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" || info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" || info.subtype == "变更" || info.subtype == "其他" { //招标结果 if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet { if tenderRepeat_C(v, info) { //有不同 return false, reason } else { reason = reason + "---招标类" return true, reason } } else { return false, reason } } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" { //中标结果 if isMeet, reason = winningRepeat_B(v, info, reason); isMeet { if winningRepeat_C(v, info) { //有不同 return false, reason } else { reason = reason + "---中标类" return true, reason } } else { return false, reason } } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" { //合同 if isMeet, reason = contractRepeat_B(v, info, reason); isMeet { if contractRepeat_C(v, info) { //有不同 return false, reason } else { reason = reason + "---合同类" return true, reason } } else { return false, reason } } else { //招标结果 if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet { if tenderRepeat_C(v, info) { //有不同 return false, reason } else { reason = reason + "---类别空-招标类" return true, reason } } else { return false, reason } } } //机构最2少一个为空 if v.agency == "" || info.agency == "" { var repeat = false if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat { reason = reason + "---机构最少一个空" return true, reason } else { return false, reason } } return false, reason } // 招标_A func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) { var ss string p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false if v.projectname != "" && v.projectname == info.projectname { ss = ss + "p1-名称-" p1 = true } if v.buyer != "" && v.buyer == info.buyer { ss = ss + "p2-单位-" p2 = true } if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) || (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) { ss = ss + "p3-编号组-" p3 = true } if v.budget != 0 && v.budget == info.budget { ss = ss + "p4-预算-" p4 = true } if v.bidopentime != 0 && v.bidopentime == info.bidopentime { ss = ss + "p9-开标时间相同-" p9 = true } if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress { ss = ss + "p10-开标地点-" p10 = true } if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 { if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) { ss = ss + "p11-标题-" p11 = true } else { if v.c_title != "" && info.c_title != "" && (strings.Contains(v.c_title, info.c_title) || strings.Contains(info.c_title, v.c_title)) { ss = ss + "p11-标题-" p11 = true } } } if info.subtype != "" && (p1 && p3 && p11) { reason = reason + "满足招标A,3要素组合-" + ss + "," return true, reason } if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) || (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) || (p1 && p3 && p4) || (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) || (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) || (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) || (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) || (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) { reason = reason + "满足招标A,3要素组合-" + ss + "," return true, reason } return false, reason } // 招标_B func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) { m, n := 0, 0 if v.projectname != "" && v.projectname == info.projectname { m++ n++ } if v.buyer != "" && v.buyer == info.buyer { m++ } if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) || (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) { m++ } if v.budget != 0 && v.budget == info.budget { m++ } if v.bidopentime != 0 && v.bidopentime == info.bidopentime { m++ } //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress { // m++ //} if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { m++ n++ } if m >= 2 { if n == 2 && m == 2 { return false, reason } else { reason = reason + "满足招标B,六选二," return true, reason } } return false, reason } // 招标_C func tenderRepeat_C(v *Info, info *Info) bool { if v.budget != 0 && info.budget != 0 && v.budget != info.budget { return true } if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime, v.bidopentime) { return true } return false } // 中标_A func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) { var ss string p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false if v.projectname != "" && v.projectname == info.projectname { ss = ss + "p1-项目名称-" p1 = true } if v.buyer != "" && v.buyer == info.buyer { ss = ss + "p2-单位-" p2 = true } if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) || (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) { ss = ss + "p3-编号组--" p3 = true } if v.bidamount != 0 && !isBidWinningAmount(v.bidamount, info.bidamount) { ss = ss + "p5-中标金-" p5 = true } if v.winner != "" && v.winner == info.winner { ss = ss + "p6-中标人-" p6 = true } if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { ss = ss + "p11-标题-" p11 = true } if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) || (p1 && p2 && p11) || (p1 && p3 && p11) || (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) || (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) || (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) || (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) || (p5 && p6 && p11) { reason = reason + "满足中标A,3要素组合-" + ss + "," return true, reason } return false, reason } // 中标_B func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) { m, n := 0, 0 if v.projectname != "" && v.projectname == info.projectname { m++ n++ } if v.buyer != "" && v.buyer == info.buyer { m++ } if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) || (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) { m++ } if v.bidamount != 0 && !isBidWinningAmount(v.bidamount, info.bidamount) { m++ } if v.winner != "" && v.winner == info.winner { m++ } if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { m++ n++ } if m >= 2 { if n == 2 && m == 2 { return false, reason } else { reason = reason + "满足中标B.六选二," return true, reason } } return false, reason } // 中标_C func winningRepeat_C(v *Info, info *Info) bool { if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount, info.bidamount) { //避免抽错金额- if ((v.projectcode != "" && info.projectcode != "" && v.projectcode == info.projectcode) || (v.contractnumber != "" && info.contractnumber != "" && v.contractnumber == info.contractnumber)) && (v.winner != "" && info.winner != "" && v.winner == info.winner) { return false } return true } if v.winner != "" && info.winner != "" && v.winner != info.winner { return true } return false } // 合同_A func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) { isMeet_1 := false if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 { return true, reason } isMeet_2 := false if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 { return true, reason } return false, reason } // 合同_B func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) { isMeet_1 := false if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 { return true, reason } isMeet_2 := false if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 { return true, reason } return false, reason } // 合同_C func contractRepeat_C(v *Info, info *Info) bool { if tenderRepeat_C(v, info) { return true } if winningRepeat_C(v, info) { return true } //合同类 - 新增编号 if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber { return true } if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode { return true } return false } // 是否相似 func isTheSimilarName(name1 string, name2 string) bool { if strings.Contains(name1, name2) || strings.Contains(name2, name1) { return true } return false } // 快速低质量数据判重 func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) { //区间间隔24小时 if !isTimeIntervalPeriod(v.publishtime, info.publishtime) { return false, reason } //首先判定是否为低质量数据 info目标数据 if info.title != "" && (info.agency == "" || v.agency == "") && (info.title == v.title) && (info.projectcode == "" || info.projectcode == v.projectcode) && info.contractnumber == "" && info.buyer == "" { isValue := 0 //五要素判断 if info.projectname != "" { //项目名称 isValue++ } if info.budget != 0 { //预算 isValue++ } if info.winner != "" { //中标单位 isValue++ } if info.bidamount != 0 { //中标金额 isValue++ } if isValue == 0 { reason = reason + "---低质量-要素均为空-标题满足" return true, reason } else if isValue == 1 { isMeet := false if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet { reason = reason + "---低质量-有且一个要素组合" return true, reason } } else if isValue == 2 { if info.subtype == "采购意向" { //特殊 if info.projectname != "" && info.projectname == v.projectname && info.budget != 0 && info.budget == v.budget && info.city != "" && info.city == v.city { reason = reason + "---采购意向~同城~预算~名称均一致" return true, reason } } } else { } } return false, reason } // 类别细节原因记录 func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) { if info.projectname != "" && isTheSimilarName(info.projectname, v.projectname) { reason = reason + "---项目名称" return true, reason } if info.budget != 0 && info.budget == v.budget { //预算 reason = reason + "---预算" return true, reason } if v.winner != "" && info.winner == v.winner { //中标单位 reason = reason + "---中标单位" return true, reason } if v.bidamount != 0 && info.bidamount == v.bidamount { //中标金额 reason = reason + "---中标金额" return true, reason } return false, reason }