12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337 |
- package main
- import (
- "fmt"
- "log"
- "math"
- qutil "qfw/util"
- "regexp"
- "strings"
- "sync"
- "time"
- )
- type Info struct {
- id string //id
- title string //标题
- area string //省份
- city string //城市
- subtype string //信息类型
- buyer string //采购单位
- agency string //代理机构
- winner string //中标单位
- budget float64 //预算金额
- bidamount float64 //中标金额
- projectname string //项目名称
- projectcode string //项目编号
- contractnumber string //合同编号
- publishtime int64 //发布时间
- comeintime int64 //入库时间
- bidopentime int64 //开标时间
- bidopenaddress string //开标地点
- site string //站点
- href string //正文的url
- repeatid string //重复id
- titleSpecialWord bool //标题特殊词
- specialWord bool //再次判断的特殊词
- mergemap map[string]interface{} //合并记录
- is_site bool //是否站点城市
- }
- var datelimit = float64(432000) //五天
- var sitelock sync.Mutex //锁
- //一般数据判重
- type datamap struct {
- lock sync.Mutex //锁
- days int //保留几天数据
- data map[string][]*Info
- keymap []string
- areakeys []string
- keys map[string]bool
- }
- func TimedTaskDatamap(days int,lasttime int64) *datamap {
- log.Println("数据池开始重新构建")
- datelimit = qutil.Float64All(days * 86400)
- dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, []string{},map[string]bool{}}
- if lasttime <0 {
- log.Println("数据池空数据")
- return dm
- }
- start := int(time.Now().Unix())
- sess := mgo.GetMgoConn()
- defer mgo.DestoryMongoConn(sess)
- query := map[string]interface{}{"publishtime": map[string]interface{}{
- "$lt": lasttime,
- }}
- log.Println("query", query)
- it := sess.DB(mgo.DbName).C(extract_back).Find(query).Sort("-publishtime").Iter()
- n, continuSum := 0, 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
- //qutil.IntAll(tmp["dataging"]) == 1
- if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 {
- } else {
- pt := tmp["publishtime"]
- pt_time := qutil.Int64All(pt)
- if qutil.Float64All(lasttime-pt_time) < datelimit {
- continuSum++
- info := NewInfo(tmp)
- dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd)
- k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
- data := dm.data[k]
- if data == nil {
- data = []*Info{}
- }
- data = append(data, info)
- dm.data[k] = data
- dm.keys[dkey] = true
- //添加省
- isAreaExist :=false
- for _,v:= range dm.areakeys {
- if v==info.area {
- isAreaExist = true
- }
- }
- if !isAreaExist {
- areaArr := dm.areakeys
- areaArr = append(areaArr,info.area)
- dm.areakeys = areaArr
- }
- } else {
- break
- }
- }
- if n%50000 == 0 {
- log.Println("当前数据池:", n, continuSum)
- }
- tmp = make(map[string]interface{})
- }
- log.Printf("数据池构建完成::%d秒,%d个\n", int(time.Now().Unix())-start, n)
- return dm
- }
- func NewDatamap(days int, lastid string) *datamap {
- datelimit = qutil.Float64All(days * 86400 * 2)
- dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}}
- if lastid == "" {
- return dm
- }
- //初始化加载数据
- sess := mgo.GetMgoConn()
- defer mgo.DestoryMongoConn(sess)
- query := map[string]interface{}{"_id": map[string]interface{}{
- "$lte": StringTOBsonId(lastid),
- }}
- log.Println("query", query)
- sortName := "-_id"
- if Is_Sort {
- sortName = "-publishtime"
- }
- it := sess.DB(mgo.DbName).C(extract).Find(query).Sort(sortName).Iter()
- now1 := int64(0)
- n, continuSum := 0, 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
- if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1{
- } else {
- pt := tmp["comeintime"]
- if Is_Sort {
- pt = tmp["publishtime"]
- }
- pt_time := qutil.Int64All(pt)
- if pt_time <= 0 {
- continue
- }
- if now1 == 0 {
- now1 = pt_time
- }
- if qutil.Float64All(now1-pt_time) < datelimit {
- continuSum++
- info := NewInfo(tmp)
- dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd)
- k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
- data := dm.data[k]
- if data == nil {
- data = []*Info{}
- }
- data = append(data, info)
- dm.data[k] = data
- dm.keys[dkey] = true
- //添加省
- isAreaExist :=false
- for _,v:= range dm.areakeys {
- if v==info.area {
- isAreaExist = true
- }
- }
- if !isAreaExist {
- areaArr := dm.areakeys
- areaArr = append(areaArr,info.area)
- dm.areakeys = areaArr
- }
- } else {
- break
- }
- }
- if n%5000 == 0 {
- log.Println("当前 n:", n,"数量:" ,continuSum)
- }
- tmp = make(map[string]interface{})
- }
- log.Println("load data:", n,"总数:",continuSum)
- return dm
- }
- func NewInfo(tmp map[string]interface{}) *Info {
- subtype := qutil.ObjToString(tmp["subtype"])
- area := qutil.ObjToString(tmp["area"])
- if area == "A" {
- area = "全国"
- }
- info := &Info{}
- if IdType {
- info.id = qutil.ObjToString(tmp["_id"])
- }else {
- info.id = BsonTOStringId(tmp["_id"])
- }
- info.title = qutil.ObjToString(tmp["title"])
- info.area = area
- info.subtype = subtype
- info.buyer = qutil.ObjToString(tmp["buyer"])
- info.projectname = qutil.ObjToString(tmp["projectname"])
- info.contractnumber = qutil.ObjToString(tmp["contractnumber"])
- info.projectcode = qutil.ObjToString(tmp["projectcode"])
- info.city = qutil.ObjToString(tmp["city"])
- info.agency = qutil.ObjToString(tmp["agency"])
- info.winner = qutil.ObjToString(tmp["winner"])
- info.budget = qutil.Float64All(tmp["budget"])
- info.bidamount = qutil.Float64All(tmp["bidamount"])
- info.publishtime = qutil.Int64All(tmp["publishtime"])
- info.comeintime = qutil.Int64All(tmp["comeintime"])
- info.bidopentime = qutil.Int64All(tmp["bidopentime"])
- info.bidopenaddress = qutil.ObjToString(tmp["bidopenaddress"])
- info.site = qutil.ObjToString(tmp["site"])
- info.href = qutil.ObjToString(tmp["href"])
- info.repeatid = qutil.ObjToString(tmp["repeatid"])
- info.specialWord = FilterRegTitle.MatchString(info.title)
- info.titleSpecialWord = FilterRegTitle_0.MatchString(info.title) ||FilterRegTitle_1.MatchString(info.title) || FilterRegTitle_2.MatchString(info.title)
- info.mergemap = *qutil.ObjToMap(tmp["merge_map"])
- if info.mergemap == nil {
- info.mergemap = make(map[string]interface{}, 0)
- }
- info.is_site = false
- return info
- }
- //判重方法
- func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) {
- reason := ""
- keys := []string{}
- d.lock.Lock()
- for k, _ := range d.keys { //不同时间段
- if info.area=="全国" {
- //匹配所有省
- for _,v := range d.areakeys{
- keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, v))
- }
- }else {
- //匹配指定省
- keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
- }
- keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
- }
- d.lock.Unlock()
- L:
- for _, k := range keys {
- d.lock.Lock()
- data := d.data[k]
- d.lock.Unlock()
- if len(data) > 0 { //对比v 找到同类型,同省或全国的数据作对比
- for _, v := range data {
- reason = ""
- if v.id == info.id { //正常重复
- return false, v, ""
- }
- //buyer 优先级高,有值且不相等过滤
- if info.buyer!=""&&v.buyer!=""&&info.buyer!=v.buyer {
- continue
- }
- if info.site != "" {//站点临时赋值
- sitelock.Lock()
- dict := SiteMap[info.site]
- sitelock.Unlock()
- if dict != nil {
- if (info.area == "全国" && dict["area"] != "")||
- (info.city == "" && dict["city"] != ""){
- info.is_site = true
- info.area = qutil.ObjToString(dict["area"])
- info.city = qutil.ObjToString(dict["city"])
- }
- }
- }
- //前置条件 - 站点相关
- if info.site != "" && info.site == v.site {
- if info.href != "" && info.href == v.href {
- reason = "同站点-href相同"
- b = true
- source = v
- reasons = reason
- break L
- }
- if info.href != "" && info.href != v.href {
- if v.title==info.title&&len([]rune(info.title)) >10 && isTheSameDay(info.publishtime,v.publishtime){
- if !againHrefRepeat(v, info) {//进行同站点二次判断
- reason = "同站点-href不同-标题相同等"
- b = true
- source = v
- reasons = reason
- break L
- }else {
- continue
- }
- }else {
- continue
- }
- }
- }
- specialNum:= dealWithSpecialWordNumber(info,v)
- //前置条件 - 标题相关,有且一个关键词
- if specialNum==1 {
- if info.title != v.title && v.title != "" && info.title != "" {
- continue
- }
- }
- //前置条件3 - 标题相关,均含有关键词
- if specialNum==2 {
- if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
- v.title != "" && info.title != "" {
- letter1,letter2:=v.title,info.title
- res, _ := regexp.Compile("[0-9a-zA-Z]+");
- if res.MatchString(letter1)||res.MatchString(letter2) {
- letter1=convertArabicNumeralsAndLetters(letter1)
- letter2=convertArabicNumeralsAndLetters(letter2)
- }
- if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
- letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
- }
- if letter1==letter2 {
- reason = reason + "标题关键词相等关系"
- if !againRepeat(v, info) {//进行二级金额判断
- b = true
- source = v
- reasons = reason
- break L
- }
- }else {
- if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
- //无包含关系-即不相等
- continue
- }
- }
- }
- }
- //前置条件-五要素均相等
- if leadingElementSame(v,info) {
- reason = "五要素-相同-满足"
- b = true
- source = v
- reasons = reason
- break L
- }
- //新增快速数据过少判重
- if LowHeavy {
- repeat := false
- if repeat, reason = fastLowQualityHeavy(v, info, reason); repeat {
- b = true
- source = v
- reasons = reason
- break L
- }
- }
- //代理机构相同-非空相等
- if v.agency != "" && info.agency != "" && v.agency == info.agency {
- reason = reason + "同机构-"
- repeat := false
- if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
- b = true
- source = v
- reasons = reason
- break L
- }
- } else {
- reason = reason + "非同机构-"
- if info.city != "" && info.city == v.city {
- reason = reason + "同城-"
- repeat := false
- if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
- b = true
- source = v
- reasons = reason
- break L
- }
- } else {
- reason = reason + "不同城-"
- repeat := false
- if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
- b = true
- source = v
- reasons = reason
- break L
- }
- }
- }
- }
- }
- }
- //往预存数据 d 添加
- if !b {
- ct := info.comeintime
- if Is_Sort ||TimingTask{
- ct = info.publishtime
- }
- dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
- k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
- d.lock.Lock()
- data := d.data[k]
- if data == nil {
- data = []*Info{info}
- d.data[k] = data
- if !d.keys[dkey] {
- d.keys[dkey] = true
- d.update(ct)
- }
- } else {
- data = append(data, info)
- d.data[k] = data
- }
- //添加省
- isAreaExist :=false
- for _,v:= range d.areakeys {
- if v==info.area {
- isAreaExist = true
- }
- }
- if !isAreaExist {
- areaArr := d.areakeys
- areaArr = append(areaArr,info.area)
- d.areakeys = areaArr
- }
- d.lock.Unlock()
- }
- return
- }
- //替换原始数据池
- func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
- //删除数据池的老数据
- ct_old := oldData.comeintime
- if Is_Sort||TimingTask {
- ct_old = oldData.publishtime
- }
- dkey_old := qutil.FormatDateByInt64(&ct_old, qutil.Date_yyyyMMdd)
- k_old := fmt.Sprintf("%s_%s_%s", dkey_old, oldData.subtype, oldData.area)
- data_old := d.data[k_old]
- for k, v := range data_old {
- if v.id == oldData.id {//删除对应当前的老数据
- data_old = append(data_old[:k], data_old[k+1:]...)
- break
- }
- }
- d.data[k_old] = data_old
- //添加新的
- ct := newData.comeintime
- if Is_Sort ||TimingTask{
- ct = newData.publishtime
- }
- dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
- k := fmt.Sprintf("%s_%s_%s", dkey, newData.subtype, newData.area)
- d.lock.Lock()
- data := d.data[k]
- if data == nil {
- data = []*Info{newData}
- d.data[k] = data
- if !d.keys[dkey] {
- d.keys[dkey] = true
- d.update(ct)
- }
- } else {
- data = append(data, newData)
- d.data[k] = data
- }
- //添加省
- isAreaExist :=false
- for _,v:= range d.areakeys {
- if v==newData.area {
- isAreaExist = true
- }
- }
- if !isAreaExist {
- areaArr := d.areakeys
- areaArr = append(areaArr,newData.area)
- d.areakeys = areaArr
- }
- d.lock.Unlock()
- }
- func (d *datamap) update(t int64) {
- if TimingTask {
- d.keymap = d.GetLatelyFiveDay(t)
- }else {
- //d.keymap = d.GetLatelyFiveDay(t)//测试数据采用
- d.keymap = d.GetLatelyFiveDayDouble(t)
- }
- m := map[string]bool{}
- for _, v := range d.keymap {
- m[v] = true
- }
- all, all1 := 0, 0
- for k, v := range d.data {
- all += len(v)
- if !m[k[:8]] {
- delete(d.data, k)
- }
- }
- for k, _ := range d.keys {
- if !m[k] {
- delete(d.keys, k)
- }
- }
- for _, v := range d.data {
- all1 += len(v)
- }
- //log.Println("更新前后数据:", all, all1)
- }
- func (d *datamap) GetLatelyFiveDay(t int64) []string {
- array := make([]string, d.days)
- now := time.Unix(t, 0)
- for i := 0; i < d.days; i++ {
- array[i] = now.Format(qutil.Date_yyyyMMdd)
- now = now.AddDate(0, 0, -1)
- }
- return array
- }
- func (d *datamap) GetLatelyFiveDayDouble(t int64) []string {//增量-两倍
- array := make([]string, d.days*2)
- now := time.Now()
- for i := 0; i < d.days*2; i++ {
- array[i] = now.Format(qutil.Date_yyyyMMdd)
- now = now.AddDate(0, 0, -1)
- }
- return array
- }
- /*
- **************************
- ******** 以下为判重 ********
- **************************
- */
-
- //完善判重数据监测-前置条件
- func convertArabicNumeralsAndLetters(data string) string {
- newData :=data
- res1, _ := regexp.Compile("[a-zA-Z]+");
- if res1.MatchString(data) {
- newData = res1.ReplaceAllStringFunc(data, strings.ToUpper);
- }
- res2, _ := regexp.Compile("[0-9]+");
- if res2.MatchString(newData) {
- arr1:=[]string {"0","1","2","3","4","5","6","7","8","9"}
- arr2:=[]string {"零","一","二","三","四","五","六","七","八","九"}
- for i:=0 ;i<len(arr1) ;i++ {
- resTemp ,_:=regexp.Compile(arr1[i])
- newData= resTemp.ReplaceAllString(newData, arr2[i]);
- }
- }
- return newData
- }
- func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
- newStr1:=str1
- newStr2:=str2
- res, _ := regexp.Compile("重新招标");
- if res.MatchString(newStr1) {
- newStr1 = res.ReplaceAllString(newStr1,"重招");
- }
- if res.MatchString(newStr2) {
- newStr2 = res.ReplaceAllString(newStr2,"重招");
- }
- return newStr1,newStr2
- }
- //关键词数量v
- func dealWithSpecialWordNumber(info*Info,v*Info) int {
- okNum:=0
- if info.titleSpecialWord || info.specialWord {
- okNum++
- }
- if v.titleSpecialWord || v.specialWord {
- okNum++
- }
- return okNum
- }
-
- //快速低质量数据判重
- func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
- //首先判定是否为低质量数据 info目标数据
- if info.agency==v.agency&&info.title!=""&&
- info.title==v.title &&
- info.projectname==""&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
- isValue:=0//五要素判断
- if info.budget != 0 {//预算
- isValue++
- }
- if info.bidopentime != 0{//开标时间
- isValue++
- }
- if info.bidopenaddress!=""{//开标地点
- isValue++
- }
- if info.winner != ""{//中标单位
- isValue++
- }
- if info.bidamount != 0 {//中标金额
- isValue++
- }
- if isValue==0 {
- reason = reason + "---低质量-要素均为空,标题包含关系"
- return true, reason
- }else if isValue==1 {
- isMeet := false
- if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
- reason = reason + "---低质量-有且一个要素组合"
- return true, reason
- }
- }else {
- }
- }
- return false,reason
- }
- //类别细节原因记录
- func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
- if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
- info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
- info.subtype == "变更" || info.subtype == "其他" {
- //招标结果
- if info.budget != 0 && info.budget == v.budget{//预算
- reason = reason + "---招标类:预算"
- return true,reason
- }
- if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
- reason = reason + "---招标类:开标时间"
- return true,reason
- }
- if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
- reason = reason + "---招标类:开标地点"
- return true,reason
- }
- } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
- //中标结果
- if v.winner != "" && info.winner == v.winner{//中标单位
- reason = reason + "---中标类:中标单位"
- return true,reason
- }
- if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
- reason = reason + "---中标类:中标金额"
- return true,reason
- }
- } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
- //合同
- if info.budget != 0 && info.budget == v.budget{//预算
- reason = reason + "---合同类:预算"
- return true,reason
- }
- if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
- reason = reason + "---合同类:开标时间"
- return true,reason
- }
- if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
- reason = reason + "---合同类:开标地点"
- return true,reason
- }
- if v.winner != "" && info.winner == v.winner{//中标单位
- reason = reason + "---合同类:中标单位"
- return true,reason
- }
- if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
- reason = reason + "---合同类:中标金额"
- return true,reason
- }
- } else {
- //招标结果
- if info.budget != 0 && info.budget == v.budget{//预算
- reason = reason + "---类别空-招标类:预算"
- return true,reason
- }
- if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
- reason = reason + "---类别空-招标类:开标时间"
- return true,reason
- }
- if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
- reason = reason + "---类别空-招标类:开标地点"
- return true,reason
- }
- }
- return false,reason
- }
- //判重方法1
- func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
- isMeet := false
- if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
- info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
- info.subtype == "变更" || info.subtype == "其他" {
- //招标结果
- if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
- if tenderRepeat_C(v, info) {
- return false, reason
- } else {
- reason = reason + "---招标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
- //中标结果
- if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
- if winningRepeat_C(v, info) {
- return false, reason
- } else {
- reason = reason + "---中标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
- //合同
- if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
- if contractRepeat_C(v, info) {
- return false, reason
- } else {
- reason = reason + "---合同类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else {
- //招标结果
- if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
- if tenderRepeat_C(v, info) {
- return false, reason
- } else {
- reason = reason + "---类别空-招标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- }
- return false, reason
- }
- //判重方法2
- func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
- isMeet := false
- if v.agency == info.agency && v.agency != "" && info.agency != "" {
- if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
- info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
- info.subtype == "变更" || info.subtype == "其他" {
- //招标结果
- if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
- if tenderRepeat_C(v, info) { //有不同
- return false, reason
- } else {
- reason = reason + "---招标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
- //中标结果
- if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
- if winningRepeat_C(v, info) { //有不同
- return false, reason
- } else {
- reason = reason + "---中标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
- //合同
- if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
- if contractRepeat_C(v, info) { //有不同
- return false, reason
- } else {
- reason = reason + "---合同类"
- return true, reason
- }
- } else {
- return false, reason
- }
- } else {
- //招标结果
- if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
- if tenderRepeat_C(v, info) { //有不同
- return false, reason
- } else {
- reason = reason + "---类别空-招标类"
- return true, reason
- }
- } else {
- return false, reason
- }
- }
- }
- //不同
- if v.agency != info.agency && v.agency != "" && info.agency != "" {
- return false, reason
- }
- //机构最少一个为空
- if v.agency == "" || info.agency == "" {
- var repeat = false
- if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
- reason = reason + "---机构最少一个空"
- return true, reason
- } else {
- return false, reason
- }
- }
- return false, reason
- }
- //招标_A
- func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
- var ss string
- p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
- if v.projectname != "" && v.projectname == info.projectname {
- ss = ss + "p1-名称-"
- p1 = true
- }
- if v.buyer != "" && v.buyer == info.buyer {
- ss = ss + "p2-单位-"
- p2 = true
- }
- if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
- (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
- ss = ss + "p3-编号组-"
- p3 = true
- }
- if v.budget != 0 && v.budget == info.budget {
- ss = ss + "p4-预算-"
- p4 = true
- }
- if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
- ss = ss + "p9-开标时间相同-"
- p9 = true
- }
- if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
- ss = ss + "p10-开标地点-"
- p10 = true
- }
- if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
- (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
- ss = ss + "p11-标题-"
- p11 = true
- }
- if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) || (p1 && p2 && p10) ||
- (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
- (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
- (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
- (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
- (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
- (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
- reason = reason + "满足招标A,3要素组合-" + ss + ","
- return true, reason
- }
- return false, reason
- }
- //招标_B
- func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
- m, n := 0, 0
- if v.projectname != "" && v.projectname == info.projectname {
- m++
- n++
- }
- if v.buyer != "" && v.buyer == info.buyer {
- m++
- }
- if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
- (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
- m++
- }
- if v.budget != 0 && v.budget == info.budget {
- m++
- }
- if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
- m++
- }
- //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
- // m++
- //}
- if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
- (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
- m++
- n++
- }
- if m >= 2 {
- if n == 2 && m == 2 {
- return false, reason
- } else {
- reason = reason + "满足招标B,六选二,"
- return true, reason
- }
- }
- return false, reason
- }
- //招标_C
- func tenderRepeat_C(v *Info, info *Info) bool {
- if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
- return true
- }
- //原始地址...
- //if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
- // return true
- //}
- if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime,v.bidopentime) {
- return true
- }
- //if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress {
- // return true
- //}
- return false
- }
- //中标_A
- func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
- var ss string
- p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
- if v.projectname != "" && v.projectname == info.projectname {
- ss = ss + "p1-项目名称-"
- p1 = true
- }
- if v.buyer != "" && v.buyer == info.buyer {
- ss = ss + "p2-单位-"
- p2 = true
- }
- if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
- (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
- ss = ss + "p3-编号组--"
- p3 = true
- }
- if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
- ss = ss + "p5-中标金-"
- p5 = true
- }
- if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
- ss = ss + "p6-中标人-"
- p6 = true
- }
- if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
- (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
- ss = ss + "p11-标题-"
- p11 = true
- }
- if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
- (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
- (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
- (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
- (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
- (p5 && p6 && p11) {
- reason = reason + "满足中标A,3要素组合-" + ss + ","
- return true, reason
- }
- return false, reason
- }
- //中标_B
- func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
- m, n := 0, 0
- if v.projectname != "" && v.projectname == info.projectname {
- m++
- n++
- }
- if v.buyer != "" && v.buyer == info.buyer {
- m++
- }
- if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
- (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
- m++
- }
- if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
- m++
- }
- if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
- m++
- }
- if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
- (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
- m++
- n++
- }
- if m >= 2 {
- if n == 2 && m == 2 {
- return false, reason
- } else {
- reason = reason + "满足中标B.六选二,"
- return true, reason
- }
- }
- return false, reason
- }
- //中标_C
- func winningRepeat_C(v *Info, info *Info) bool {
- if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
- return true
- }
- //
- if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
- return true
- }
- //原始地址...
- //if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
- // return true
- //}
- return false
- }
- //合同_A
- func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
- isMeet_1 := false
- if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
- return true, reason
- }
- isMeet_2 := false
- if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
- return true, reason
- }
- return false, reason
- }
- //合同_B
- func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
- isMeet_1 := false
- if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
- return true, reason
- }
- isMeet_2 := false
- if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
- return true, reason
- }
- return false, reason
- }
- //合同_C
- func contractRepeat_C(v *Info, info *Info) bool {
- if tenderRepeat_C(v, info) {
- return true
- }
- if winningRepeat_C(v, info) {
- return true
- }
- //合同类 - 新增编号
- if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
- return true
- }
- if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
- return true
- }
- return false
- }
- //同站点再次判断
- func againHrefRepeat(v *Info, info *Info) bool {
- //if v.buyer == info.buyer {
- //
- //}
- if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
- return true
- }
- if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- return true
- }
- if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
- return true
- }
- if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
- return true
- }
- if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
- return true
- }
- if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
- return true
- }
- //if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
- // info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
- // info.subtype == "变更" || info.subtype == "其他" {
- // //招标结果
- // if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- // return true
- // }
- //} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
- // //中标结果
- // if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
- // (deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
- // return true
- // }
- //} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
- // //合同
- // if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- // return true
- // }
- // if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
- // (deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
- // return true
- // }
- //
- //} else {
- // if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- // return true
- // }
- //}
- return false
- }
- //关键词再次判断
- func againRepeat(v *Info, info *Info) bool {
- //相同采购单位下
- //if info.buyer != "" && v.buyer == info.buyer {
- //if v.buyer == info.buyer {
- //
- //}
- //if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
- // info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
- // info.subtype == "其他" || info.subtype == "变更" {
- // //预算金额满足条件
- // if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- // return true
- // }
- //} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" ||
- // info.subtype == "流标" || info.subtype == "合同" || info.subtype == "验收" ||
- // info.subtype == "违规" {
- // //中标金额单位满足条件
- // if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
- // (deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
- // return true
- // }
- //} else {
- // //预算金额满足条件
- // if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- // return true
- // }
- //}
- if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
- return true
- }
- if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- return true
- }
- if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
- return true
- }
- if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
- return true
- }
- if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
- return true
- }
- if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
- return true
- }
- return false
- }
- //删除中标单位字符串中多余的空格(含tab)
- func deleteExtraSpace(s string) string {
- //删除字符串中的多余空格,有多个空格时,仅保留一个空格
- s1 := strings.Replace(s, " ", " ", -1) //替换tab为空格
- regstr := "\\s{2,}" //两个及两个以上空格的正则表达式
- reg, _ := regexp.Compile(regstr) //编译正则表达式
- s2 := make([]byte, len(s1)) //定义字符数组切片
- copy(s2, s1) //将字符串复制到切片
- spc_index := reg.FindStringIndex(string(s2)) //在字符串中搜索
- for len(spc_index) > 0 { //找到适配项
- s2 = append(s2[:spc_index[0]+1], s2[spc_index[1]:]...) //删除多余空格
- spc_index = reg.FindStringIndex(string(s2)) //继续在字符串中搜索
- }
- return string(s2)
- }
- //中标金额倍率:10000
- func isBidWinningAmount(f1 float64 ,f2 float64) bool {
- if f1==f2||f1*10000==f2||f2*10000==f1 {
- return false
- }
- return true
- }
- //开标时间区间为一天
- func isBidopentimeInterval(i1 int64 ,i2 int64) bool {
- if i1==0||i2==0 {
- return false
- }
- //不在同一天-或者同一天间隔超过六小时,属于不相等返回true
- timeOne,timeTwo:=i1,i2
- day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
- day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
- if day1==day2 {
- //是否间隔超过六小时
- if math.Abs(float64(i1-i2)) >21600.0 {
- return true
- }else {
- return false
- }
- }else {
- return true
- }
- }
- //开标时间区间为一天
- func isTheSameDay(i1 int64 ,i2 int64) bool {
- if i1==0||i2==0 {
- return false
- }
- timeOne,timeTwo:=i1,i2
- day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
- day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
- if day1==day2 {
- return true
- }
- return false
- }
- //前置0 五要素均相等认为重复
- func leadingElementSame(v *Info, info *Info) bool {
- isok:= 0
- if info.projectname != "" && v.projectname == info.projectname {
- isok++
- }
- if info.buyer != "" && v.buyer == info.buyer {
- isok++
- }
- if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
- if info.contractnumber != "" && v.contractnumber == info.contractnumber {
- isok++
- }
- }else {
- if info.projectcode != "" && v.projectcode == info.projectcode {
- isok++
- }
- }
- if info.title != "" && v.title == info.title {
- isok++
- }
- if v.agency == info.agency {
- isok++
- }
- if isok==5 {
- return true
- }
- return false
- }
|