123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205 |
- package main
- import (
- "fmt"
- "log"
- "math"
- qutil "qfw/util"
- "strings"
- "sync"
- "time"
- )
- type Info struct {
- id string
- title string
- area string
- city string
- subtype string
- buyer string
- agency string //代理机构
- winner string //中标单位
- projectname string
- projectcode string
- publishtime int64
- ContainSpecialWord bool
- }
- var datelimit = float64(432000)
- type datamap struct {
- lock sync.Mutex //锁
- days int //保留几天数据
- data map[string][]*Info
- keymap []string
- }
- func NewDatamap(days int) *datamap {
- datelimit = qutil.Float64All(days * 86400)
- dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}}
- dm.keymap = dm.GetLatelyFiveDay()
- //初始化加载数据
- sess := mgo.GetMgoConn()
- defer mgo.DestoryMongoConn(sess)
- it := sess.DB(mgo.DbName).C(extract).Find(nil).Sort("-_id").Iter()
- now1 := time.Now().Unix()
- n, continuSum := 0, 0
- for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
- //
- if qutil.IntAll(tmp["repeat"]) == 1 || qutil.ObjToString(tmp["subtype"]) == "变更" {
- continuSum++
- } else {
- cm := tmp["comeintime"]
- comeintime := qutil.Int64All(cm)
- if qutil.Float64All(now1-comeintime) < datelimit {
- info := NewInfo(tmp)
- k := fmt.Sprintf("%s_%s_%s", qutil.FormatDateWithObj(&cm, qutil.Date_yyyyMMdd), info.subtype, info.area)
- data := dm.data[k]
- if data == nil {
- data = []*Info{}
- //log.Println(k)
- }
- data = append(data, info)
- dm.data[k] = data
- } else {
- break
- }
- }
- if n%5000 == 0 {
- log.Println("current n:", n, continuSum)
- }
- tmp = make(map[string]interface{})
- }
- log.Println("load data:", n)
- //启动定时任务
- now := time.Now()
- t2 := time.Date(now.Year(), now.Month(), now.Day()+1, 0, 0, 0, 0, time.Local)
- go time.AfterFunc(time.Duration(int64(t2.Unix()-now.Unix()))*time.Second, func() {
- //go time.AfterFunc(time.Duration(10)*time.Second, func() {
- dm.update()
- })
- return dm
- }
- func NewInfo(tmp map[string]interface{}) *Info {
- subtype := qutil.ObjToString(tmp["subtype"])
- area := qutil.ObjToString(tmp["area"])
- if area == "A" {
- area = "全国"
- }
- info := &Info{}
- info.id = qutil.BsonIdToSId(tmp["_id"])
- info.title = qutil.ObjToString(tmp["title"])
- info.area = area
- info.subtype = subtype
- info.buyer = qutil.ObjToString(tmp["buyer"])
- info.projectname = qutil.ObjToString(tmp["projectname"])
- info.ContainSpecialWord = FilterRegexp.MatchString(info.projectname) || FilterRegexp.MatchString(info.title)
- info.projectcode = qutil.ObjToString(tmp["projectcode"])
- info.city = qutil.ObjToString(tmp["city"])
- info.agency = qutil.ObjToString(tmp["agency"])
- //info.winner = qutil.ObjToString(tmp["winner"])
- info.publishtime = qutil.Int64All(tmp["publishtime"])
- return info
- }
- func (d *datamap) check(info *Info) (b bool, id string) {
- d.lock.Lock()
- defer d.lock.Unlock()
- keys := []string{}
- for _, k := range d.keymap {
- keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
- if info.area != "全国" { //这个后续可以不要
- keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
- }
- }
- L:
- for _, k := range keys {
- data := d.data[k]
- if len(data) > 1 { //对比
- for _, v := range data {
- if math.Abs(qutil.Float64All(v.publishtime-info.publishtime)) > datelimit {
- continue
- }
- if v.agency != "" && info.agency != "" && v.agency != info.agency {
- continue
- }
- n := 0
- if v.buyer != "" && v.buyer == info.buyer {
- n++
- }
- if v.projectname != "" && v.projectname == info.projectname {
- n++
- }
- if !info.ContainSpecialWord && n > 1 {
- b = true
- id = v.id
- break L
- } else if v.projectcode != "" && v.projectcode == info.projectcode {
- n++
- }
- if !info.ContainSpecialWord && n > 1 || n > 2 {
- b = true
- id = v.id
- break L
- }
- //标题长度大于10且相等即为重复
- // if len([]rune(info.title)) > 10 && v.title == info.title {
- // b = true
- // id = v.id
- // break L
- // }
- //标题长度大于10且包含关系+buyer/projectname/projectcode/city(全国/A的只判断包含关系即可)相等即为重复
- if len([]rune(info.title)) > 10 && len([]rune(v.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
- if info.area == "全国" || n > 0 || info.city == v.city {
- b = true
- id = v.id
- break L
- }
- }
- }
- }
- }
- if !b {
- k := fmt.Sprintf("%s_%s_%s", time.Now().Format(qutil.Date_yyyyMMdd), info.subtype, info.area)
- data := d.data[k]
- if data == nil {
- data = []*Info{info}
- } else {
- data = append(data, info)
- }
- d.data[k] = data
- }
- return
- }
- func (d *datamap) update() {
- //每天0点清除历史数据
- d.lock.Lock()
- now, now1 := time.Now(), time.Now()
- t2 := time.Date(now1.Year(), now1.Month(), now1.Day()+1, 0, 0, 0, 0, time.Local)
- date := now.AddDate(0, 0, -d.days).Format(qutil.Date_yyyyMMdd)
- all, all1 := 0, 0
- for k, v := range d.data {
- all += len(v)
- if strings.HasPrefix(k, date) {
- delete(d.data, k)
- }
- }
- for _, v := range d.data {
- all1 += len(v)
- }
- log.Println("更新前后数据:", all, all1)
- d.keymap = d.GetLatelyFiveDay()
- d.lock.Unlock()
- time.AfterFunc(time.Duration(int64(t2.Unix()-now1.Unix()))*time.Second, d.update)
- }
- func (d *datamap) GetLatelyFiveDay() []string {
- array := make([]string, d.days)
- now := time.Now()
- for i := 0; i < d.days; i++ {
- array[i] = now.Format(qutil.Date_yyyyMMdd)
- now = now.AddDate(0, 0, -1)
- }
- return array
- }
|