123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492 |
- package main
- import (
- "strings"
- )
- // 判重方法1
- func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
- isMeet := false
- if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
- info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
- info.subtype == "变更" || info.subtype == "其他" {
- //招标结果
- if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
- if tenderRepeat_C(v, info) {
- return false, reason
- } else {
- reason = reason + "---招标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
- //中标结果
- if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
- if winningRepeat_C(v, info) {
- return false, reason
- } else {
- reason = reason + "---中标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
- //合同
- if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
- if contractRepeat_C(v, info) {
- return false, reason
- } else {
- reason = reason + "---合同类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else {
- //招标结果
- if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
- if tenderRepeat_C(v, info) {
- return false, reason
- } else {
- reason = reason + "---类别空-招标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- }
- return false, reason
- }
- // 判重方法2
- func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
- isMeet := false
- isAgency := false
- //招标类-代理机构不同-广泛前后缀比较
- if v.agency != info.agency && v.agency != "" && info.agency != "" {
- //新增一层判断
- if strings.Contains(v.agency, info.agency) || strings.Contains(info.agency, v.agency) {
- isAgency = true
- } else {
- return false, reason
- }
- }
- if (v.agency == info.agency && v.agency != "" && info.agency != "") || isAgency {
- if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
- info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
- info.subtype == "变更" || info.subtype == "其他" {
- //招标结果
- if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
- if tenderRepeat_C(v, info) { //有不同
- return false, reason
- } else {
- reason = reason + "---招标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
- //中标结果
- if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
- if winningRepeat_C(v, info) { //有不同
- return false, reason
- } else {
- reason = reason + "---中标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
- //合同
- if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
- if contractRepeat_C(v, info) { //有不同
- return false, reason
- } else {
- reason = reason + "---合同类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else {
- //招标结果
- if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
- if tenderRepeat_C(v, info) { //有不同
- return false, reason
- } else {
- reason = reason + "---类别空-招标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- }
- }
- //机构最2少一个为空
- if v.agency == "" || info.agency == "" {
- var repeat = false
- if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
- reason = reason + "---机构最少一个空"
- return true, reason
- } else {
- return false, reason
- }
- }
- return false, reason
- }
- // 招标_A
- func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
- var ss string
- p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
- if v.projectname != "" && v.projectname == info.projectname {
- ss = ss + "p1-名称-"
- p1 = true
- }
- if v.buyer != "" && v.buyer == info.buyer {
- ss = ss + "p2-单位-"
- p2 = true
- }
- if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
- (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
- ss = ss + "p3-编号组-"
- p3 = true
- }
- if v.budget != 0 && v.budget == info.budget {
- ss = ss + "p4-预算-"
- p4 = true
- }
- if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
- ss = ss + "p9-开标时间相同-"
- p9 = true
- }
- if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
- ss = ss + "p10-开标地点-"
- p10 = true
- }
- if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 {
- if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) {
- ss = ss + "p11-标题-"
- p11 = true
- } else {
- if v.c_title != "" && info.c_title != "" &&
- (strings.Contains(v.c_title, info.c_title) || strings.Contains(info.c_title, v.c_title)) {
- ss = ss + "p11-标题-"
- p11 = true
- }
- }
- }
- if info.subtype != "" && (p1 && p3 && p11) {
- reason = reason + "满足招标A,3要素组合-" + ss + ","
- return true, reason
- }
- if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
- (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) || (p1 && p3 && p4) ||
- (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
- (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
- (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
- (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
- (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
- reason = reason + "满足招标A,3要素组合-" + ss + ","
- return true, reason
- }
- return false, reason
- }
- // 招标_B
- func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
- m, n := 0, 0
- if v.projectname != "" && v.projectname == info.projectname {
- m++
- n++
- }
- if v.buyer != "" && v.buyer == info.buyer {
- m++
- }
- if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
- (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
- m++
- }
- if v.budget != 0 && v.budget == info.budget {
- m++
- }
- if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
- m++
- }
- //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
- // m++
- //}
- if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
- (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
- m++
- n++
- }
- if m >= 2 {
- if n == 2 && m == 2 {
- return false, reason
- } else {
- reason = reason + "满足招标B,六选二,"
- return true, reason
- }
- }
- return false, reason
- }
- // 招标_C
- func tenderRepeat_C(v *Info, info *Info) bool {
- if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
- return true
- }
- if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime, v.bidopentime) {
- return true
- }
- return false
- }
- // 中标_A
- func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
- var ss string
- p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
- if v.projectname != "" && v.projectname == info.projectname {
- ss = ss + "p1-项目名称-"
- p1 = true
- }
- if v.buyer != "" && v.buyer == info.buyer {
- ss = ss + "p2-单位-"
- p2 = true
- }
- if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
- (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
- ss = ss + "p3-编号组--"
- p3 = true
- }
- if v.bidamount != 0 && !isBidWinningAmount(v.bidamount, info.bidamount) {
- ss = ss + "p5-中标金-"
- p5 = true
- }
- if v.winner != "" && v.winner == info.winner {
- ss = ss + "p6-中标人-"
- p6 = true
- }
- if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
- (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
- ss = ss + "p11-标题-"
- p11 = true
- }
- if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
- (p1 && p2 && p11) || (p1 && p3 && p11) ||
- (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
- (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
- (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
- (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
- (p5 && p6 && p11) {
- reason = reason + "满足中标A,3要素组合-" + ss + ","
- return true, reason
- }
- return false, reason
- }
- // 中标_B
- func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
- m, n := 0, 0
- if v.projectname != "" && v.projectname == info.projectname {
- m++
- n++
- }
- if v.buyer != "" && v.buyer == info.buyer {
- m++
- }
- if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
- (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
- m++
- }
- if v.bidamount != 0 && !isBidWinningAmount(v.bidamount, info.bidamount) {
- m++
- }
- if v.winner != "" && v.winner == info.winner {
- m++
- }
- if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
- (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
- m++
- n++
- }
- if m >= 2 {
- if n == 2 && m == 2 {
- return false, reason
- } else {
- reason = reason + "满足中标B.六选二,"
- return true, reason
- }
- }
- return false, reason
- }
- // 中标_C
- func winningRepeat_C(v *Info, info *Info) bool {
- if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount, info.bidamount) {
- //避免抽错金额-
- if ((v.projectcode != "" && info.projectcode != "" && v.projectcode == info.projectcode) ||
- (v.contractnumber != "" && info.contractnumber != "" && v.contractnumber == info.contractnumber)) &&
- (v.winner != "" && info.winner != "" && v.winner == info.winner) {
- return false
- }
- return true
- }
- if v.winner != "" && info.winner != "" && v.winner != info.winner {
- return true
- }
- return false
- }
- // 合同_A
- func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
- isMeet_1 := false
- if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
- return true, reason
- }
- isMeet_2 := false
- if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
- return true, reason
- }
- return false, reason
- }
- // 合同_B
- func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
- isMeet_1 := false
- if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
- return true, reason
- }
- isMeet_2 := false
- if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
- return true, reason
- }
- return false, reason
- }
- // 合同_C
- func contractRepeat_C(v *Info, info *Info) bool {
- if tenderRepeat_C(v, info) {
- return true
- }
- if winningRepeat_C(v, info) {
- return true
- }
- //合同类 - 新增编号
- if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
- return true
- }
- if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
- return true
- }
- return false
- }
- // 是否相似
- func isTheSimilarName(name1 string, name2 string) bool {
- if strings.Contains(name1, name2) || strings.Contains(name2, name1) {
- return true
- }
- return false
- }
- // 快速低质量数据判重
- func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
- //区间间隔24小时
- if !isTimeIntervalPeriod(v.publishtime, info.publishtime) {
- return false, reason
- }
- //首先判定是否为低质量数据 info目标数据
- if info.title != "" && (info.agency == "" || v.agency == "") &&
- (info.title == v.title) &&
- (info.projectcode == "" || info.projectcode == v.projectcode) &&
- info.contractnumber == "" && info.buyer == "" {
- isValue := 0 //五要素判断
- if info.projectname != "" { //项目名称
- isValue++
- }
- if info.budget != 0 { //预算
- isValue++
- }
- if info.winner != "" { //中标单位
- isValue++
- }
- if info.bidamount != 0 { //中标金额
- isValue++
- }
- if isValue == 0 {
- reason = reason + "---低质量-要素均为空-标题满足"
- return true, reason
- } else if isValue == 1 {
- isMeet := false
- if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
- reason = reason + "---低质量-有且一个要素组合"
- return true, reason
- }
- } else if isValue == 2 {
- if info.subtype == "采购意向" { //特殊
- if info.projectname != "" && info.projectname == v.projectname &&
- info.budget != 0 && info.budget == v.budget &&
- info.city != "" && info.city == v.city {
- reason = reason + "---采购意向~同城~预算~名称均一致"
- return true, reason
- }
- }
- } else {
- }
- }
- return false, reason
- }
- // 类别细节原因记录
- func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
- if info.projectname != "" && isTheSimilarName(info.projectname, v.projectname) {
- reason = reason + "---项目名称"
- return true, reason
- }
- if info.budget != 0 && info.budget == v.budget { //预算
- reason = reason + "---预算"
- return true, reason
- }
- if v.winner != "" && info.winner == v.winner { //中标单位
- reason = reason + "---中标单位"
- return true, reason
- }
- if v.bidamount != 0 && info.bidamount == v.bidamount { //中标金额
- reason = reason + "---中标金额"
- return true, reason
- }
- return false, reason
- }
|