123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454 |
- package main
- import (
- "fmt"
- "log"
- "math"
- qutil "qfw/util"
- "qfw/util/mongodb"
- "strconv"
- "strings"
- "sync"
- "time"
- )
- type Info struct {
- id string
- title string
- area string
- city string
- subtype string
- buyer string
- agency string //代理机构
- winner string //中标单位
- projectname string
- projectcode string
- publishtime int64
- comeintime int64
- bidopentime int64 //开标时间
- agencyaddr string//开标地点
- detail string//招标内容
- site string//站点
- ContainSpecialWord bool
- }
- var datelimit = float64(432000)
- var mm int
- type datamap struct {
- lock sync.Mutex //锁
- days int //保留几天数据
- data map[string][]*Info
- keymap []string
- keys map[string]bool
- }
- func NewDatamap(days int, lastid string) *datamap {
- datelimit = qutil.Float64All(days * 86400)
- dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, map[string]bool{}}
- if lastid == "" {
- return dm
- }
- //初始化加载数据
- sess := mgo.GetMgoConn()
- defer mgo.DestoryMongoConn(sess)
- it := sess.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$lte":"`+lastid+`"}}`, true)).Sort("-_id").Iter()
- now1 := int64(0)
- n, continuSum := 0, 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
- //|| qutil.ObjToString(tmp["subtype"]) == "变更" //变更的数据打开
- if qutil.IntAll(tmp["repeat"]) == 1 {
- continuSum++
- } else {
- cm := tmp["comeintime"] //时间单位
- //cm := tmp["publishtime"]
- comeintime := qutil.Int64All(cm)
- if comeintime == 0 {
- id := qutil.BsonIdToSId(tmp["_id"])[0:8]
- comeintime, _ = strconv.ParseInt(id, 16, 64)
- }
- if now1 == 0 {
- now1 = comeintime
- }
- if qutil.Float64All(now1-comeintime) < datelimit {
- info := NewInfo(tmp)
- dkey := qutil.FormatDateWithObj(&cm, qutil.Date_yyyyMMdd)
- k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
- data := dm.data[k]
- if data == nil {
- data = []*Info{}
- //log.Println(k)
- }
- data = append(data, info)
- dm.data[k] = data
- dm.keys[dkey] = true
- } else {
- break
- }
- }
- if n%5000 == 0 {
- log.Println("current n:", n, continuSum)
- }
- tmp = make(map[string]interface{})
- }
- log.Println("load data:", n)
- return dm
- }
- func NewInfo(tmp map[string]interface{}) *Info {
- subtype := qutil.ObjToString(tmp["subtype"])
- area := qutil.ObjToString(tmp["area"])
- if area == "A" {
- area = "全国"
- }
- info := &Info{}
- info.id = qutil.BsonIdToSId(tmp["_id"])
- info.title = qutil.ObjToString(tmp["title"])
- info.area = area
- info.subtype = subtype
- info.buyer = qutil.ObjToString(tmp["buyer"])
- info.projectname = qutil.ObjToString(tmp["projectname"])
- //info.ContainSpecialWord = FilterRegexp.MatchString(info.projectname) || FilterRegexp.MatchString(info.title)
- info.ContainSpecialWord = FilterRegTitle.MatchString(info.title)
- info.projectcode = qutil.ObjToString(tmp["projectcode"])
- info.city = qutil.ObjToString(tmp["city"])
- info.agency = qutil.ObjToString(tmp["agency"])
- //info.winner = qutil.ObjToString(tmp["winner"])
- info.publishtime = qutil.Int64All(tmp["publishtime"])
- info.bidopentime = qutil.Int64All(tmp["bidopentime"])
- info.agencyaddr = qutil.ObjToString(tmp["agencyaddr"])
- info.detail = qutil.ObjToString(tmp["detail"])
- info.site = qutil.ObjToString(tmp["site"])
- return info
- }
- func (d *datamap) check(info *Info) (b bool, id string) {
- d.lock.Lock()
- defer d.lock.Unlock()
- keys := []string{}
- for k, _ := range d.keys {
- keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
- if info.area != "全国" { //这个后续可以不要
- keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
- }
- }
- L:
- for _, k := range keys {
- data := d.data[k]
- if len(data) > 0 { //对比
- for _, v := range data {
- //正常重复
- if v.id == info.id {
- return false, v.id
- }
- if math.Abs(qutil.Float64All(v.publishtime-info.publishtime)) > datelimit {
- continue
- }
- if v.agency != "" && info.agency != "" && v.agency != info.agency {
- continue
- }
- if info.subtype==v.subtype {
- if info.subtype == "变更" {
- //以下为新增方法 , 变更数据判重处理 v为原数据 info为目标数据
- if info.publishtime<v.publishtime{
- continue
- }
- if info.ContainSpecialWord&&info.title!=v.title&&v.title!="" {
- continue
- }
- if v.projectcode != info.projectcode&&len([]rune(info.projectcode)) >=10&&v.projectcode!=""{
- continue
- }
- //同城判定有效
- first_judge:= false
- if (v.projectcode != ""&&v.projectcode==info.projectcode&&v.projectname != ""&&v.projectname==info.projectname)||
- (v.title != ""&&v.title==info.title&&v.bidopentime != 0&&v.bidopentime==info.bidopentime&&v.detail != ""&&v.detail==info.detail) {
- first_judge = true
- }
- //3/6等判断
- n := 0
- if v.title != "" && v.title == info.title {
- n++
- }
- if v.projectname != "" && v.projectname == info.projectname {
- n++
- }
- if v.projectcode != "" && v.projectcode == info.projectcode {
- n++
- }
- if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
- n++
- }
- if v.agencyaddr != "" && v.agencyaddr == info.agencyaddr {
- n++
- }
- if v.detail != "" && v.detail == info.detail {
- n++
- }
- t:= judgeCityType(v.area,info.area,v.city,info.city)
- if n>=3||first_judge==true {
- if t==2 {
- //同城
- b = true
- id = v.id
- log.Print("同城满足的",info.id)
- break L
- }
- }
- }else {//非变更数据判重处理
- n:=0 //三要素
- m:=0 //二要素
- x:=0 //四要素
- if info.buyer != "" &&v.buyer == info.buyer {
- n++
- x++
- }
- if info.projectname != ""&&v.projectname == info.projectname {
- n++
- m++
- x++
- }
- if info.projectcode != ""&&v.projectcode == info.projectcode {
- n++
- m++
- x++
- }
- if info.title != ""&&v.title == info.title {
- x++
- }
- t:= judgeCityType(v.area,info.area,v.city,info.city)
- c_1 :=conditionTitle(v.title,info.title) //标题满足
- c_2 :=conditionNum(v.projectcode,info.projectcode) //编号满足
- c_3 :=conditionTAB(v.title,info.title,v.buyer,info.buyer) //标题+采购单位
- //同站点判断
- if info.site != "" && v.site == info.site {
- if n>1||c_1||c_2 {
- b = true
- id = v.id
- log.Println("站点满足过滤")
- break L
- }
- }else {
- if info.ContainSpecialWord&&info.title!=v.title&&v.title!="" {
- continue
- }
- if v.projectcode != info.projectcode&&len([]rune(info.projectcode)) >=10&&v.projectcode!=""{
- continue
- }
- //先决条件满足三要素
- if n==3{
- b = true
- id = v.id
- break L
- }
- //城市判断
- if t==0||t==1 { //最少一个全国
- if c_1 && (c_2||n>1) {
- b = true
- id = v.id
- break L
- }
- if c_2&&x>2{
- b = true
- id = v.id
- break L
- }
- }else if t==2 { // 省-市
- if c_1||c_2||n>1 {
- b = true
- id = v.id
- break L
- }
- }else if t==3 {// !省 !市
- if (c_1&&n>1)||(c_2&&x>2){
- b = true
- id = v.id
- break L
- }
- }else if t==4 {// 省 !市
- if m>1||(c_1&&m>0)||(c_2&&x>1)||(c_3&&n>1){
- b = true
- id = v.id
- break L
- }
- }else {
- }
- }
- }
- }
- ////非变更数据判重处理
- //n := 0
- //if v.buyer != "" && v.buyer == info.buyer {
- // n++
- //}
- //if v.projectname != "" && v.projectname == info.projectname {
- // n++
- //}
- //if !info.ContainSpecialWord && n > 1 {
- // b = true
- // id = v.id
- // break L
- //} else if v.projectcode != "" && v.projectcode == info.projectcode {
- // n++
- //}
- //if !info.ContainSpecialWord && n > 1 || n > 2 {
- // b = true
- // id = v.id
- // break L
- //}
- ////标题长度大于10且相等即为重复
- //// if len([]rune(info.title)) > 10 && v.title == info.title {
- //// b = true
- //// id = v.id
- //// break L
- //// }
- ////标题长度大于10且包含关系+buyer/projectname/projectcode/city(全国/A的只判断包含关系即可)相等即为重复
- //if len([]rune(info.title)) > 10 && len([]rune(v.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
- // if info.area == "全国" || n > 0 || info.city == v.city {
- // b = true
- // id = v.id
- // break L
- // }
- //}
- }
- }
- }
- //往预存数据 d 添加
- if !b {
- ct, _ := strconv.ParseInt(info.id[:8], 16, 64)
- dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
- k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
- data := d.data[k]
- if data == nil {
- data = []*Info{info}
- d.data[k] = data
- if !d.keys[dkey] {
- d.keys[dkey] = true
- d.update(ct)
- }
- } else {
- data = append(data, info)
- d.data[k] = data
- }
- }
- return
- }
- //判断是否同城等情况
- func judgeCityType(v string, info string,v_c string,info_c string) (t int) {
- t=0
- if (v=="全国"||v=="")&&(info=="全国"||info=="") {//均为全国
- t=0
- }else if v!="全国"&&info!="全国"&&v!=""&&info!=""&&
- v_c!="全国"&&info_c!="全国"&&v_c!=""&&info_c!=""{//均非全国
- if v==info &&v_c==info_c { //同省同城
- t=2
- }else if v!=info&&v_c!=info_c{//非同省非同城
- t=3
- }else {//同省非同城
- t=4
- }
- }else {//有且一个全国
- t=1
- }
- return t
- }
- //条件一 标题
- func conditionTitle(t1 string, t2 string) bool {
- if len([]rune(t1))>10 && len([]rune(t2))>10&&
- (strings.Contains(t1, t2)||strings.Contains(t2, t1)) {
- return true
- }
- return false
- }
- //条件二 项目编号
- func conditionNum(c1 string ,c2 string) bool {
- if c1 == c2&&len([]rune(c1)) >=10 {
- return true
- }
- return false
- }
- //条件三 采购单位+标题
- func conditionTAB(t1 string ,t2 string,b1 string,b2 string) bool {
- if t1==t2&&b1==b2 {
- return true
- }
- return false
- }
- func (d *datamap) update(t int64) {
- //每天0点清除历史数据
- d.keymap = d.GetLatelyFiveDay(t)
- m := map[string]bool{}
- for _, v := range d.keymap {
- m[v] = true
- }
- all, all1 := 0, 0
- for k, v := range d.data {
- all += len(v)
- if !m[k[:8]] {
- delete(d.data, k)
- }
- }
- for k, _ := range d.keys {
- if !m[k] {
- delete(d.keys, k)
- }
- }
- for _, v := range d.data {
- all1 += len(v)
- }
- //log.Println("更新前后数据:", all, all1)
- }
- func (d *datamap) GetLatelyFiveDay(t int64) []string {
- array := make([]string, d.days)
- now := time.Unix(t, 0)
- for i := 0; i < d.days; i++ {
- array[i] = now.Format(qutil.Date_yyyyMMdd)
- now = now.AddDate(0, 0, -1)
- }
- return array
- }
|