package main import ( "fmt" "log" qutil "qfw/util" "qfw/util/mongodb" "regexp" "strings" "sync" "time" ) type Info struct { id string //id title string //标题 area string //省份 city string //城市 subtype string //信息类型 buyer string //采购单位 agency string //代理机构 winner string //中标单位 budget float64 //预算金额 bidamount float64 //中标金额 projectname string //项目名称 projectcode string //项目编号 contractnumber string //合同编号 publishtime int64 //发布时间 comeintime int64 //入库时间 bidopentime int64 //开标时间 bidopenaddress string //开标地点 site string //站点 href string //正文的url repeatid string //重复id titleSpecialWord bool //标题特殊词 specialWord bool //再次判断的特殊词 mergemap map[string]interface{} //合并记录 is_site bool //是否站点城市 } var datelimit = float64(432000) //五天 var sitelock sync.Mutex //锁 //一般数据判重 type datamap struct { lock sync.Mutex //锁 days int //保留几天数据 data map[string][]*Info keymap []string areakeys []string keys map[string]bool } //历史更新数据 type historymap struct { lock sync.Mutex //锁 days int //保留几天数据 data map[string][]*Info keymap []string areakeys []string keys map[string]bool } func TimedTaskDatamap(days int,lasttime int64) *datamap { log.Println("数据池开始重新构建") datelimit = qutil.Float64All(days * 86400) dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, []string{},map[string]bool{}} if lasttime <0 { log.Println("数据池空数据") return dm } start := int(time.Now().Unix()) sess := mgo.GetMgoConn() defer mgo.DestoryMongoConn(sess) query := map[string]interface{}{"publishtime": map[string]interface{}{ "$lt": lasttime, }} log.Println("query", query) it := sess.DB(mgo.DbName).C(extract_back).Find(query).Sort("-publishtime").Iter() n, continuSum := 0, 0 for tmp := make(map[string]interface{}); it.Next(&tmp); n++ { //qutil.IntAll(tmp["dataging"]) == 1 if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1||qutil.IntAll(tmp["dataging"]) == 1 { } else { pt := tmp["publishtime"] pt_time := qutil.Int64All(pt) if qutil.Float64All(lasttime-pt_time) < datelimit { continuSum++ info := NewInfo(tmp) dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd) k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area) data := dm.data[k] if data == nil { data = []*Info{} } data = append(data, info) dm.data[k] = data dm.keys[dkey] = true //添加省 isAreaExist :=false for _,v:= range dm.areakeys { if v==info.area { isAreaExist = true } } if !isAreaExist { areaArr := dm.areakeys areaArr = append(areaArr,info.area) dm.areakeys = areaArr } } else { break } } if n%50000 == 0 { log.Println("current 数据池:", n, continuSum) } tmp = make(map[string]interface{}) } log.Printf("数据池构建完成::%d秒,%d个\n", int(time.Now().Unix())-start, n) return dm } func NewDatamap(days int, lastid string) *datamap { datelimit = qutil.Float64All(days * 86400) dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}} if lastid == "" { return dm } //初始化加载数据 sess := mgo.GetMgoConn() defer mgo.DestoryMongoConn(sess) query := map[string]interface{}{"_id": map[string]interface{}{ "$lte": StringTOBsonId(lastid), }} log.Println("query", query) it := sess.DB(mgo.DbName).C(extract).Find(query).Sort("-_id").Iter() now1 := int64(0) n, continuSum := 0, 0 for tmp := make(map[string]interface{}); it.Next(&tmp); n++ { if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 { continuSum++ } else { pt := tmp["comeintime"] if Is_Sort { pt = tmp["publishtime"] } pt_time := qutil.Int64All(pt) if pt_time <= 0 { continue } if now1 == 0 { now1 = pt_time } if qutil.Float64All(now1-pt_time) < datelimit { info := NewInfo(tmp) dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd) k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area) data := dm.data[k] if data == nil { data = []*Info{} } data = append(data, info) dm.data[k] = data dm.keys[dkey] = true //添加省 isAreaExist :=false for _,v:= range dm.areakeys { if v==info.area { isAreaExist = true } } if !isAreaExist { areaArr := dm.areakeys areaArr = append(areaArr,info.area) dm.areakeys = areaArr } } else { break } } if n%5000 == 0 { log.Println("current n:", n, continuSum) } tmp = make(map[string]interface{}) } log.Println("load data:", n) return dm } //构建新历史数据池 func NewHistorymap(startid string, lastid string, startTime int64, lastTime int64) *historymap { datelimit = qutil.Float64All(5 * 86400) hm := &historymap{sync.Mutex{}, 5, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}} if lastid == "" || startid == "" { return hm } //取startid之前5天 sess_start := mgo.GetMgoConn() defer mgo.DestoryMongoConn(sess_start) //lte gte it_start := sess_start.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$lte":"`+startid+`"}}`, true)).Sort("-_id").Iter() m, n := 0, 0 for tmp_start := make(map[string]interface{}); it_start.Next(&tmp_start); { if qutil.IntAll(tmp_start["repeat"]) == 1||qutil.IntAll(tmp_start["repeat"]) == -1 { continue } pt_s := tmp_start["comeintime"] if Is_Sort { pt_s = tmp_start["publishtime"] } pt_time := qutil.Int64All(pt_s) if pt_time <= 0 { continue } if qutil.Float64All(startTime-pt_time) <= datelimit { n++ info := NewInfo(tmp_start) dkey := qutil.FormatDateWithObj(&pt_s, qutil.Date_yyyyMMdd) k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area) data := hm.data[k] if data == nil { data = []*Info{} } data = append(data, info) hm.data[k] = data hm.keys[dkey] = true //添加省 isAreaExist :=false for _,v:= range hm.areakeys { if v==info.area { isAreaExist = true } } if !isAreaExist { areaArr := hm.areakeys areaArr = append(areaArr,info.area) hm.areakeys = areaArr } } else { break } tmp_start = make(map[string]interface{}) } log.Println("load history 前:", n) //取lastid之后5天 sess_last := mgo.GetMgoConn() defer mgo.DestoryMongoConn(sess_last) //lte gte it_last := sess_last.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$gte":"`+lastid+`"}}`, true)).Sort("_id").Iter() for tmp_last := make(map[string]interface{}); it_last.Next(&tmp_last); { if qutil.IntAll(tmp_last["repeat"]) == 1||qutil.IntAll(tmp_last["repeat"]) == -1 { continue } pt_l := tmp_last["comeintime"] if Is_Sort { pt_l = tmp_last["publishtime"] } pt_time := qutil.Int64All(pt_l) if pt_time <= 0 { continue } if qutil.Float64All(pt_time-lastTime) <= datelimit { m++ info := NewInfo(tmp_last) dkey := qutil.FormatDateWithObj(&pt_l, qutil.Date_yyyyMMdd) k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area) data := hm.data[k] if data == nil { data = []*Info{} } data = append(data, info) hm.data[k] = data hm.keys[dkey] = true //添加省 isAreaExist :=false for _,v:= range hm.areakeys { if v==info.area { isAreaExist = true } } if !isAreaExist { areaArr := hm.areakeys areaArr = append(areaArr,info.area) hm.areakeys = areaArr } } else { break } tmp_last = make(map[string]interface{}) } log.Println("load history 后:", m) return hm } func NewInfo(tmp map[string]interface{}) *Info { subtype := qutil.ObjToString(tmp["subtype"]) area := qutil.ObjToString(tmp["area"]) if area == "A" { area = "全国" } info := &Info{} info.id = BsonTOStringId(tmp["_id"]) info.title = qutil.ObjToString(tmp["title"]) info.area = area info.subtype = subtype info.buyer = qutil.ObjToString(tmp["buyer"]) info.projectname = qutil.ObjToString(tmp["projectname"]) info.contractnumber = qutil.ObjToString(tmp["contractnumber"]) info.projectcode = qutil.ObjToString(tmp["projectcode"]) info.city = qutil.ObjToString(tmp["city"]) info.agency = qutil.ObjToString(tmp["agency"]) info.winner = qutil.ObjToString(tmp["winner"]) info.budget = qutil.Float64All(tmp["budget"]) info.bidamount = qutil.Float64All(tmp["bidamount"]) info.publishtime = qutil.Int64All(tmp["publishtime"]) info.comeintime = qutil.Int64All(tmp["comeintime"]) info.bidopentime = qutil.Int64All(tmp["bidopentime"]) info.bidopenaddress = qutil.ObjToString(tmp["bidopenaddress"]) info.site = qutil.ObjToString(tmp["site"]) info.href = qutil.ObjToString(tmp["href"]) info.repeatid = qutil.ObjToString(tmp["repeatid"]) info.specialWord = FilterRegTitle.MatchString(info.title) info.titleSpecialWord = FilterRegTitle_0.MatchString(info.title) ||FilterRegTitle_1.MatchString(info.title) || FilterRegTitle_2.MatchString(info.title) info.mergemap = *qutil.ObjToMap(tmp["merge_map"]) if info.mergemap == nil { info.mergemap = make(map[string]interface{}, 0) } info.is_site = false return info } //判重方法 func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) { reason := "" keys := []string{} d.lock.Lock() for k, _ := range d.keys { //不同时间段 if info.area=="全国" { //匹配所有省 for _,v := range d.areakeys{ keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, v)) } }else { //匹配指定省 keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area)) } keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国")) } d.lock.Unlock() L: for _, k := range keys { d.lock.Lock() data := d.data[k] d.lock.Unlock() if len(data) > 0 { //对比v 找到同类型,同省或全国的数据作对比 //log.Println(info.area,info.subtype,k) for _, v := range data { reason = "" if v.id == info.id { //正常重复 //log.Println("相同id",info.id) return false, v, "" } //if v.id == "5c761a4fa5cb26b9b73d9512" &&info.id=="5c767bd1a5cb26b9b7a61597" { // log.Println("测试数据") //} if info.subtype == v.subtype { if info.site != "" { sitelock.Lock() dict := SiteMap[info.site] sitelock.Unlock() if dict != nil { if info.area == "全国" && dict["area"] != "" { info.is_site = true info.area = qutil.ObjToString(dict["area"]) info.city = qutil.ObjToString(dict["city"]) } else { if info.city == "" && dict["city"] != "" { info.is_site = true info.area = qutil.ObjToString(dict["area"]) info.city = qutil.ObjToString(dict["city"]) } } } } //前置条件1 - 站点相关 if info.site != "" && info.site == v.site { if info.href != "" && info.href == v.href { reason = "href相同" b = true source = v reasons = reason break L } if info.href != "" && info.href != v.href { reason = "href不同-" } } //前置条件2 - 标题相关,有且一个关键词 if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) && info.title != v.title && v.title != "" && info.title != "" { continue } //前置条件3 - 标题相关,均含有关键词 if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) && len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" { if !(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { continue //无包含关系 } if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) { reason = reason + "标题关键词且包含关系" //继续二级金额判断 if !againRepeat(v, info) { b = true source = v reasons = reason break } } } //新增快速数据过少判重 if LowHeavy { repeat := false if repeat, reason = fastLowQualityHeavy(v, info, reason); repeat { b = true source = v reasons = reason break } } //代理机构相同-非空相等 if v.agency != "" && info.agency != "" && v.agency == info.agency { reason = reason + "同机构-" repeat := false if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat { b = true source = v reasons = reason break } } else { reason = reason + "非同机构-" if info.city != "" && info.city == v.city { reason = reason + "同城-" repeat := false if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat { b = true source = v reasons = reason break } } else { reason = reason + "不同城-" repeat := false if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat { b = true source = v reasons = reason break } } } } } } } //往预存数据 d 添加 if !b { ct := info.comeintime if Is_Sort ||TimingTask{ ct = info.publishtime } dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd) k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area) d.lock.Lock() data := d.data[k] if data == nil { data = []*Info{info} d.data[k] = data if !d.keys[dkey] { d.keys[dkey] = true d.update(ct) } } else { data = append(data, info) d.data[k] = data } //添加省 isAreaExist :=false for _,v:= range d.areakeys { if v==info.area { isAreaExist = true } } if !isAreaExist { areaArr := d.areakeys areaArr = append(areaArr,info.area) d.areakeys = areaArr } d.lock.Unlock() } return } func (h *historymap) checkHistory(info *Info) (b bool, source *Info, reasons string) { reason := "" keys := []string{} h.lock.Lock() for k, _ := range h.keys { //不同时间段 if info.area=="全国" { //匹配所有省 for _,v := range h.areakeys{ keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, v)) } }else { //匹配指定省 keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area)) } keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国")) } h.lock.Unlock() L: for _, k := range keys { h.lock.Lock() data := h.data[k] h.lock.Unlock() if len(data) > 0 { //对比v 找到同类型,同省或全国的数据作对比 for _, v := range data { reason = "" if v.id == info.id { //正常重复 return false, v, "" } if info.subtype == v.subtype { if info.site != "" { sitelock.Lock() dict := SiteMap[info.site] sitelock.Unlock() if dict != nil { if info.area == "全国" && dict["area"] != "" { info.area = qutil.ObjToString(dict["area"]) info.city = qutil.ObjToString(dict["city"]) } else { if info.city == "" && dict["city"] != "" { info.area = qutil.ObjToString(dict["area"]) info.city = qutil.ObjToString(dict["city"]) } } } } //前置条件1 - 站点相关 if info.site != "" && info.site == v.site { if info.href != "" && info.href == v.href { reason = "href相同" b = true source = v reasons = reason break L } if info.href != "" && info.href != v.href { reason = "href不同-" } } //前置条件2 - 标题相关,有且一个关键词 if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) && info.title != v.title && v.title != "" && info.title != "" { continue } //前置条件3 - 标题相关,均含有关键词 if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) && len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" { if !(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { continue //无包含关系 } if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) { reason = reason + "标题关键词且包含关系" //继续二级金额判断 if !againRepeat(v, info) { b = true source = v reasons = reason break } } } //新增快速数据过少判重 if LowHeavy { repeat := false if repeat, reason = fastLowQualityHeavy(v, info, reason); repeat { b = true source = v reasons = reason break } } //代理机构相同-非空相等 if v.agency != "" && info.agency != "" && v.agency == info.agency { reason = reason + "同机构-" repeat := false if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat { b = true source = v reasons = reason break } } else { reason = reason + "非同机构-" if info.city != "" && info.city == v.city { reason = reason + "同城-" repeat := false if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat { b = true source = v reasons = reason break } } else { reason = reason + "不同城-" repeat := false if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat { b = true source = v reasons = reason break } } } } } } } // if b { if info.repeatid == source.id { b = false //重复-无变化-不处理 } } else { if source != nil { if source.repeatid != "" { //未判重-有变化--记录 b = true reason = "未判重记录" reasons = reason } } } //往预存数据 d 添加 if !b { ct := info.comeintime if Is_Sort { ct = info.publishtime } dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd) k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area) data := h.data[k] if data == nil { data = []*Info{info} h.data[k] = data if !h.keys[dkey] { h.keys[dkey] = true //h.update(ct) } } else { data = append(data, info) h.data[k] = data } //添加省 isAreaExist :=false for _,v:= range h.areakeys { if v==info.area { isAreaExist = true } } if !isAreaExist { areaArr := h.areakeys areaArr = append(areaArr,info.area) h.areakeys = areaArr } } return } //替换原始数据池 func (d *datamap) replaceSourceData(replaceData *Info, replaceId string) { ct := replaceData.comeintime if Is_Sort||TimingTask { ct = replaceData.publishtime } dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd) k := fmt.Sprintf("%s_%s_%s", dkey, replaceData.subtype, replaceData.area) d.lock.Lock() data := d.data[k] if data == nil { data = []*Info{replaceData} d.data[k] = data if !d.keys[dkey] { d.keys[dkey] = true } } else { //遍历替换 for k, v := range data { if v.id == replaceId { data[k] = replaceData break } } d.data[k] = data } d.lock.Unlock() } func (h *historymap) replaceSourceData(replaceData *Info, replaceId string) { ct := replaceData.comeintime if Is_Sort { ct = replaceData.publishtime } dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd) k := fmt.Sprintf("%s_%s_%s", dkey, replaceData.subtype, replaceData.area) h.lock.Lock() data := h.data[k] if data == nil { data = []*Info{replaceData} h.data[k] = data if !h.keys[dkey] { h.keys[dkey] = true } } else { //遍历替换 for k, v := range data { if v.id == replaceId { data[k] = replaceData break } } h.data[k] = data } h.lock.Unlock() } func (d *datamap) update(t int64) { //每天0点清除历史数据 d.keymap = d.GetLatelyFiveDay(t) m := map[string]bool{} for _, v := range d.keymap { m[v] = true } all, all1 := 0, 0 for k, v := range d.data { all += len(v) if !m[k[:8]] { delete(d.data, k) } } for k, _ := range d.keys { if !m[k] { delete(d.keys, k) } } for _, v := range d.data { all1 += len(v) } //log.Println("更新前后数据:", all, all1) } func (d *datamap) GetLatelyFiveDay(t int64) []string { array := make([]string, d.days) now := time.Unix(t, 0) for i := 0; i < d.days; i++ { array[i] = now.Format(qutil.Date_yyyyMMdd) now = now.AddDate(0, 0, -1) } return array } /* ************************** ******** 以下为判重 ******** ************************** */ //快速低质量数据判重 func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) { //首先判定是否为低质量数据 info目标数据 if info.agency==v.agency&&info.title!=""&& info.title==v.title && info.projectname==""&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" { isValue:=0//五要素判断 if info.budget != 0 {//预算 isValue++ } if info.bidopentime != 0{//开标时间 isValue++ } if info.bidopenaddress!=""{//开标地点 isValue++ } if info.winner != ""{//中标单位 isValue++ } if info.bidamount != 0 {//中标金额 isValue++ } if isValue==0 { //if info.site!=v.site { // log.Println("符合低质量条件条件0",info.id,"--",v.id) //} //log.Println("符合低质量条件条件0",info.id,"--",v.id) reason = reason + "---要素均为空,标题包含关系" return true, reason }else if isValue==1 { isMeet := false if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet { //log.Println("符合低质量条件条件1",info.id,"--",v.id) reason = reason + "---有且一个要素组合" return true, reason } }else { } } return false,reason } //类别细节原因记录 func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) { if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" || info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" || info.subtype == "变更" || info.subtype == "其他" { //招标结果 if info.budget != 0 && info.budget == v.budget{//预算 reason = reason + "---招标类:预算" return true,reason } if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间 reason = reason + "---招标类:开标时间" return true,reason } if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点 reason = reason + "---招标类:开标地点" return true,reason } } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" { //中标结果 if v.winner != "" && info.winner == v.winner{//中标单位 reason = reason + "---中标类:中标单位" return true,reason } if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额 reason = reason + "---中标类:中标金额" return true,reason } } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" { //合同 if info.budget != 0 && info.budget == v.budget{//预算 reason = reason + "---合同类:预算" return true,reason } if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间 reason = reason + "---合同类:开标时间" return true,reason } if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点 reason = reason + "---合同类:开标地点" return true,reason } if v.winner != "" && info.winner == v.winner{//中标单位 reason = reason + "---合同类:中标单位" return true,reason } if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额 reason = reason + "---合同类:中标金额" return true,reason } } else { //招标结果 if info.budget != 0 && info.budget == v.budget{//预算 reason = reason + "---类别空-招标类:预算" return true,reason } if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间 reason = reason + "---类别空-招标类:开标时间" return true,reason } if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点 reason = reason + "---类别空-招标类:开标地点" return true,reason } } return false,reason } //判重方法1 func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) { isMeet := false if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" || info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" || info.subtype == "变更" || info.subtype == "其他" { //招标结果 if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet { if tenderRepeat_C(v, info) { return false, reason } else { reason = reason + "---招标类" return true, reason } } else { return false, reason } } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" { //中标结果 if isMeet, reason = winningRepeat_A(v, info, reason); isMeet { if winningRepeat_C(v, info) { return false, reason } else { reason = reason + "---中标类" return true, reason } } else { return false, reason } } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" { //合同 if isMeet, reason = contractRepeat_A(v, info, reason); isMeet { if contractRepeat_C(v, info) { return false, reason } else { reason = reason + "---合同类" return true, reason } } else { return false, reason } } else { //招标结果 if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet { if tenderRepeat_C(v, info) { return false, reason } else { reason = reason + "---类别空-招标类" return true, reason } } else { return false, reason } } return false, reason } //判重方法2 func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) { isMeet := false if v.agency == info.agency && v.agency != "" && info.agency != "" { if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" || info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" || info.subtype == "变更" || info.subtype == "其他" { //招标结果 if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet { if tenderRepeat_C(v, info) { //有不同 return false, reason } else { reason = reason + "---招标类" return true, reason } } else { return false, reason } } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" { //中标结果 if isMeet, reason = winningRepeat_B(v, info, reason); isMeet { if winningRepeat_C(v, info) { //有不同 return false, reason } else { reason = reason + "---中标类" return true, reason } } else { return false, reason } } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" { //合同 if isMeet, reason = contractRepeat_B(v, info, reason); isMeet { if contractRepeat_C(v, info) { //有不同 return false, reason } else { reason = reason + "---合同类" return true, reason } } else { return false, reason } } else { //招标结果 if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet { if tenderRepeat_C(v, info) { //有不同 return false, reason } else { reason = reason + "---类别空-招标类" return true, reason } } else { return false, reason } } } //不同 if v.agency != info.agency && v.agency != "" && info.agency != "" { return false, reason } //机构最少一个为空 if v.agency == "" || info.agency == "" { var repeat = false if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat { reason = reason + "---机构最少一个空" return true, reason } else { return false, reason } } return false, reason } //招标_A func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) { var ss string p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false if v.projectname != "" && v.projectname == info.projectname { ss = ss + "p1(名称)-" p1 = true } if v.buyer != "" && v.buyer == info.buyer { ss = ss + "p2(单位)-" p2 = true } if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) || (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) { ss = ss + "p3(编号组)-" p3 = true } if v.budget != 0 && v.budget == info.budget { ss = ss + "p4(预算)-" p4 = true } if v.bidopentime != 0 && v.bidopentime == info.bidopentime { ss = ss + "p9(开标时间)-" p9 = true } if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress { ss = ss + "p10(开标地点)-" p10 = true } if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { ss = ss + "p11(标题)-" p11 = true } if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) || (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) || (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) || (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) || (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) || (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) || (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) { reason = reason + "满足招标A,3要素组合-" + ss + "," return true, reason } return false, reason } //招标_B func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) { m, n := 0, 0 if v.projectname != "" && v.projectname == info.projectname { m++ n++ } if v.buyer != "" && v.buyer == info.buyer { m++ } if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) || (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) { m++ } if v.budget != 0 && v.budget == info.budget { m++ } if v.bidopentime != 0 && v.bidopentime == info.bidopentime { m++ } //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress { // m++ //} if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { m++ n++ } if m >= 2 { if n == 2 && m == 2 { return false, reason } else { reason = reason + "满足招标B,六选二," return true, reason } } return false, reason } //招标_C func tenderRepeat_C(v *Info, info *Info) bool { if v.budget != 0 && info.budget != 0 && v.budget != info.budget { return true } //原始地址... if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer { return true } if v.bidopentime != 0 && info.bidopentime != 0 && v.bidopentime != info.bidopentime { return true } if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress { return true } return false } //中标_A func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) { var ss string p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false if v.projectname != "" && v.projectname == info.projectname { ss = ss + "p1(项目名称)-" p1 = true } if v.buyer != "" && v.buyer == info.buyer { ss = ss + "p2(单位)-" p2 = true } if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) || (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) { ss = ss + "p3(编号组)-" p3 = true } //if v.bidamount != 0 && v.bidamount == info.bidamount { // ss = ss + "p5(中标金)-" // p5 = true //} //if v.winner != "" && v.winner == info.winner { // ss = ss + "p6(中标人)-" // p6 = true //} if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) { ss = ss + "p5(中标金)-" p5 = true } if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) { ss = ss + "p6(中标人)-" p6 = true } if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { ss = ss + "p11(标题)-" p11 = true } if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) || (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) || (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) || (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) || (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) || (p5 && p6 && p11) { reason = reason + "满足中标A,3要素组合-" + ss + "," return true, reason } return false, reason } //中标_B func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) { m, n := 0, 0 if v.projectname != "" && v.projectname == info.projectname { m++ n++ } if v.buyer != "" && v.buyer == info.buyer { m++ } if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) || (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) { m++ } if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) { m++ } if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) { m++ } if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { m++ n++ } if m >= 2 { if n == 2 && m == 2 { return false, reason } else { reason = reason + "满足中标B.六选二," return true, reason } } return false, reason } //中标_C func winningRepeat_C(v *Info, info *Info) bool { //if v.bidamount != 0 && info.bidamount != 0 && v.bidamount != info.bidamount { // return true //} if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) { return true } if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) { return true } //原始地址... if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer { return true } return false } //合同_A func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) { isMeet_1 := false if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 { return true, reason } isMeet_2 := false if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 { return true, reason } return false, reason } //合同_B func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) { isMeet_1 := false if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 { return true, reason } isMeet_2 := false if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 { return true, reason } return false, reason } //合同_C func contractRepeat_C(v *Info, info *Info) bool { if tenderRepeat_C(v, info) { return true } if winningRepeat_C(v, info) { return true } return false } //再次金额判断 func againRepeat(v *Info, info *Info) bool { //相同采购单位下 if info.buyer != "" && v.buyer == info.buyer { if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" || info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" || info.subtype == "其他" || info.subtype == "变更" { //预算金额满足条件 if v.budget != info.budget && v.budget != 0 && info.budget != 0 { return true } } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" || info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" { //中标金额单位满足条件 if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) || (deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") { return true } } else { } } return false } //删除中标单位字符串中多余的空格(含tab) func deleteExtraSpace(s string) string { //删除字符串中的多余空格,有多个空格时,仅保留一个空格 s1 := strings.Replace(s, " ", " ", -1) //替换tab为空格 regstr := "\\s{2,}" //两个及两个以上空格的正则表达式 reg, _ := regexp.Compile(regstr) //编译正则表达式 s2 := make([]byte, len(s1)) //定义字符数组切片 copy(s2, s1) //将字符串复制到切片 spc_index := reg.FindStringIndex(string(s2)) //在字符串中搜索 for len(spc_index) > 0 { //找到适配项 s2 = append(s2[:spc_index[0]+1], s2[spc_index[1]:]...) //删除多余空格 spc_index = reg.FindStringIndex(string(s2)) //继续在字符串中搜索 } return string(s2) } //中标金额倍率:10000 func isBidWinningAmount(f1 float64 ,f2 float64) bool { if f1==f2||f1*10000==f2||f2*10000==f1 { return false } return true }