package main import ( "fmt" "log" "math" qutil "qfw/util" "strings" "sync" "time" ) type Info struct { id string title string area string city string subtype string buyer string agency string //代理机构 winner string //中标单位 projectname string projectcode string publishtime int64 ContainSpecialWord bool } var datelimit = float64(432000) type datamap struct { lock sync.Mutex //锁 days int //保留几天数据 data map[string][]*Info keymap []string } func NewDatamap(days int) *datamap { datelimit = qutil.Float64All(days * 86400) dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}} dm.keymap = dm.GetLatelyFiveDay() //初始化加载数据 sess := mgo.GetMgoConn() defer mgo.DestoryMongoConn(sess) it := sess.DB(mgo.DbName).C(extract).Find(nil).Sort("-_id").Iter() now1 := time.Now().Unix() n, continuSum := 0, 0 for tmp := make(map[string]interface{}); it.Next(tmp); n++ { // if qutil.IntAll(tmp["repeat"]) == 1 || qutil.ObjToString(tmp["subtype"]) == "变更" { continuSum++ } else { cm := tmp["comeintime"] comeintime := qutil.Int64All(cm) if qutil.Float64All(now1-comeintime) < datelimit { info := NewInfo(tmp) k := fmt.Sprintf("%s_%s_%s", qutil.FormatDateWithObj(&cm, qutil.Date_yyyyMMdd), info.subtype, info.area) data := dm.data[k] if data == nil { data = []*Info{} //log.Println(k) } data = append(data, info) dm.data[k] = data } else { break } } if n%5000 == 0 { log.Println("current n:", n, continuSum) } tmp = make(map[string]interface{}) } log.Println("load data:", n) //启动定时任务 now := time.Now() t2 := time.Date(now.Year(), now.Month(), now.Day()+1, 0, 0, 0, 0, time.Local) go time.AfterFunc(time.Duration(int64(t2.Unix()-now.Unix()))*time.Second, func() { //go time.AfterFunc(time.Duration(10)*time.Second, func() { dm.update() }) return dm } func NewInfo(tmp map[string]interface{}) *Info { subtype := qutil.ObjToString(tmp["subtype"]) area := qutil.ObjToString(tmp["area"]) if area == "A" { area = "全国" } info := &Info{} info.id = qutil.BsonIdToSId(tmp["_id"]) info.title = qutil.ObjToString(tmp["title"]) info.area = area info.subtype = subtype info.buyer = qutil.ObjToString(tmp["buyer"]) info.projectname = qutil.ObjToString(tmp["projectname"]) info.ContainSpecialWord = FilterRegexp.MatchString(info.projectname) || FilterRegexp.MatchString(info.title) info.projectcode = qutil.ObjToString(tmp["projectcode"]) info.city = qutil.ObjToString(tmp["city"]) info.agency = qutil.ObjToString(tmp["agency"]) //info.winner = qutil.ObjToString(tmp["winner"]) info.publishtime = qutil.Int64All(tmp["publishtime"]) return info } func (d *datamap) check(info *Info) (b bool, id string) { d.lock.Lock() defer d.lock.Unlock() keys := []string{} for _, k := range d.keymap { keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area)) if info.area != "全国" { //这个后续可以不要 keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国")) } } L: for _, k := range keys { data := d.data[k] if len(data) > 1 { //对比 for _, v := range data { if math.Abs(qutil.Float64All(v.publishtime-info.publishtime)) > datelimit { continue } if v.agency != "" && info.agency != "" && v.agency != info.agency { continue } n := 0 if v.buyer != "" && v.buyer == info.buyer { n++ } if v.projectname != "" && v.projectname == info.projectname { n++ } if !info.ContainSpecialWord && n > 1 { b = true id = v.id break L } else if v.projectcode != "" && v.projectcode == info.projectcode { n++ } if !info.ContainSpecialWord && n > 1 || n > 2 { b = true id = v.id break L } //标题长度大于10且相等即为重复 // if len([]rune(info.title)) > 10 && v.title == info.title { // b = true // id = v.id // break L // } //标题长度大于10且包含关系+buyer/projectname/projectcode/city(全国/A的只判断包含关系即可)相等即为重复 if len([]rune(info.title)) > 10 && len([]rune(v.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) { if info.area == "全国" || n > 0 || info.city == v.city { b = true id = v.id break L } } } } } if !b { k := fmt.Sprintf("%s_%s_%s", time.Now().Format(qutil.Date_yyyyMMdd), info.subtype, info.area) data := d.data[k] if data == nil { data = []*Info{info} } else { data = append(data, info) } d.data[k] = data } return } func (d *datamap) update() { //每天0点清除历史数据 d.lock.Lock() now, now1 := time.Now(), time.Now() t2 := time.Date(now1.Year(), now1.Month(), now1.Day()+1, 0, 0, 0, 0, time.Local) date := now.AddDate(0, 0, -d.days).Format(qutil.Date_yyyyMMdd) all, all1 := 0, 0 for k, v := range d.data { all += len(v) if strings.HasPrefix(k, date) { delete(d.data, k) } } for _, v := range d.data { all1 += len(v) } log.Println("更新前后数据:", all, all1) d.keymap = d.GetLatelyFiveDay() d.lock.Unlock() time.AfterFunc(time.Duration(int64(t2.Unix()-now1.Unix()))*time.Second, d.update) } func (d *datamap) GetLatelyFiveDay() []string { array := make([]string, d.days) now := time.Now() for i := 0; i < d.days; i++ { array[i] = now.Format(qutil.Date_yyyyMMdd) now = now.AddDate(0, 0, -1) } return array }