|
@@ -1,205 +0,0 @@
|
|
|
-package main
|
|
|
-
|
|
|
-import (
|
|
|
- "fmt"
|
|
|
- "log"
|
|
|
- "math"
|
|
|
- qutil "qfw/util"
|
|
|
- "strings"
|
|
|
- "sync"
|
|
|
- "time"
|
|
|
-)
|
|
|
-
|
|
|
-type Info struct {
|
|
|
- id string
|
|
|
- title string
|
|
|
- area string
|
|
|
- city string
|
|
|
- subtype string
|
|
|
- buyer string
|
|
|
- agency string //代理机构
|
|
|
- winner string //中标单位
|
|
|
- projectname string
|
|
|
- projectcode string
|
|
|
- publishtime int64
|
|
|
- ContainSpecialWord bool
|
|
|
-}
|
|
|
-
|
|
|
-var datelimit = float64(432000)
|
|
|
-
|
|
|
-type datamap struct {
|
|
|
- lock sync.Mutex //锁
|
|
|
- days int //保留几天数据
|
|
|
- data map[string][]*Info
|
|
|
- keymap []string
|
|
|
-}
|
|
|
-
|
|
|
-func NewDatamap(days int) *datamap {
|
|
|
- datelimit = qutil.Float64All(days * 86400)
|
|
|
- dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}}
|
|
|
- dm.keymap = dm.GetLatelyFiveDay()
|
|
|
- //初始化加载数据
|
|
|
- sess := mgo.GetMgoConn()
|
|
|
- defer mgo.DestoryMongoConn(sess)
|
|
|
- it := sess.DB(mgo.DbName).C(extract).Find(nil).Sort("-_id").Iter()
|
|
|
- now1 := time.Now().Unix()
|
|
|
- n, continuSum := 0, 0
|
|
|
- for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
|
|
|
- //
|
|
|
- if qutil.IntAll(tmp["repeat"]) == 1 || qutil.ObjToString(tmp["subtype"]) == "变更" {
|
|
|
- continuSum++
|
|
|
- } else {
|
|
|
- cm := tmp["comeintime"]
|
|
|
- comeintime := qutil.Int64All(cm)
|
|
|
- if qutil.Float64All(now1-comeintime) < datelimit {
|
|
|
- info := NewInfo(tmp)
|
|
|
- k := fmt.Sprintf("%s_%s_%s", qutil.FormatDateWithObj(&cm, qutil.Date_yyyyMMdd), info.subtype, info.area)
|
|
|
- data := dm.data[k]
|
|
|
- if data == nil {
|
|
|
- data = []*Info{}
|
|
|
- //log.Println(k)
|
|
|
- }
|
|
|
- data = append(data, info)
|
|
|
- dm.data[k] = data
|
|
|
- } else {
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
- if n%5000 == 0 {
|
|
|
- log.Println("current n:", n, continuSum)
|
|
|
- }
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- }
|
|
|
- log.Println("load data:", n)
|
|
|
- //启动定时任务
|
|
|
- now := time.Now()
|
|
|
- t2 := time.Date(now.Year(), now.Month(), now.Day()+1, 0, 0, 0, 0, time.Local)
|
|
|
- go time.AfterFunc(time.Duration(int64(t2.Unix()-now.Unix()))*time.Second, func() {
|
|
|
- //go time.AfterFunc(time.Duration(10)*time.Second, func() {
|
|
|
- dm.update()
|
|
|
- })
|
|
|
- return dm
|
|
|
-}
|
|
|
-func NewInfo(tmp map[string]interface{}) *Info {
|
|
|
- subtype := qutil.ObjToString(tmp["subtype"])
|
|
|
- area := qutil.ObjToString(tmp["area"])
|
|
|
- if area == "A" {
|
|
|
- area = "全国"
|
|
|
- }
|
|
|
- info := &Info{}
|
|
|
- info.id = qutil.BsonIdToSId(tmp["_id"])
|
|
|
- info.title = qutil.ObjToString(tmp["title"])
|
|
|
- info.area = area
|
|
|
- info.subtype = subtype
|
|
|
- info.buyer = qutil.ObjToString(tmp["buyer"])
|
|
|
- info.projectname = qutil.ObjToString(tmp["projectname"])
|
|
|
- info.ContainSpecialWord = FilterRegexp.MatchString(info.projectname) || FilterRegexp.MatchString(info.title)
|
|
|
- info.projectcode = qutil.ObjToString(tmp["projectcode"])
|
|
|
- info.city = qutil.ObjToString(tmp["city"])
|
|
|
- info.agency = qutil.ObjToString(tmp["agency"])
|
|
|
- //info.winner = qutil.ObjToString(tmp["winner"])
|
|
|
- info.publishtime = qutil.Int64All(tmp["publishtime"])
|
|
|
- return info
|
|
|
-}
|
|
|
-
|
|
|
-func (d *datamap) check(info *Info) (b bool, id string) {
|
|
|
- d.lock.Lock()
|
|
|
- defer d.lock.Unlock()
|
|
|
- keys := []string{}
|
|
|
- for _, k := range d.keymap {
|
|
|
- keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
|
|
|
- if info.area != "全国" { //这个后续可以不要
|
|
|
- keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
|
|
|
- }
|
|
|
- }
|
|
|
-L:
|
|
|
- for _, k := range keys {
|
|
|
- data := d.data[k]
|
|
|
- if len(data) > 1 { //对比
|
|
|
- for _, v := range data {
|
|
|
- if math.Abs(qutil.Float64All(v.publishtime-info.publishtime)) > datelimit {
|
|
|
- continue
|
|
|
- }
|
|
|
- if v.agency != "" && info.agency != "" && v.agency != info.agency {
|
|
|
- continue
|
|
|
- }
|
|
|
- n := 0
|
|
|
- if v.buyer != "" && v.buyer == info.buyer {
|
|
|
- n++
|
|
|
- }
|
|
|
- if v.projectname != "" && v.projectname == info.projectname {
|
|
|
- n++
|
|
|
- }
|
|
|
- if !info.ContainSpecialWord && n > 1 {
|
|
|
- b = true
|
|
|
- id = v.id
|
|
|
- break L
|
|
|
- } else if v.projectcode != "" && v.projectcode == info.projectcode {
|
|
|
- n++
|
|
|
- }
|
|
|
- if !info.ContainSpecialWord && n > 1 || n > 2 {
|
|
|
- b = true
|
|
|
- id = v.id
|
|
|
- break L
|
|
|
- }
|
|
|
- //标题长度大于10且相等即为重复
|
|
|
- // if len([]rune(info.title)) > 10 && v.title == info.title {
|
|
|
- // b = true
|
|
|
- // id = v.id
|
|
|
- // break L
|
|
|
- // }
|
|
|
- //标题长度大于10且包含关系+buyer/projectname/projectcode/city(全国/A的只判断包含关系即可)相等即为重复
|
|
|
- if len([]rune(info.title)) > 10 && len([]rune(v.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
|
|
|
- if info.area == "全国" || n > 0 || info.city == v.city {
|
|
|
- b = true
|
|
|
- id = v.id
|
|
|
- break L
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if !b {
|
|
|
- k := fmt.Sprintf("%s_%s_%s", time.Now().Format(qutil.Date_yyyyMMdd), info.subtype, info.area)
|
|
|
- data := d.data[k]
|
|
|
- if data == nil {
|
|
|
- data = []*Info{info}
|
|
|
- } else {
|
|
|
- data = append(data, info)
|
|
|
- }
|
|
|
- d.data[k] = data
|
|
|
- }
|
|
|
- return
|
|
|
-}
|
|
|
-
|
|
|
-func (d *datamap) update() {
|
|
|
- //每天0点清除历史数据
|
|
|
- d.lock.Lock()
|
|
|
- now, now1 := time.Now(), time.Now()
|
|
|
- t2 := time.Date(now1.Year(), now1.Month(), now1.Day()+1, 0, 0, 0, 0, time.Local)
|
|
|
- date := now.AddDate(0, 0, -d.days).Format(qutil.Date_yyyyMMdd)
|
|
|
- all, all1 := 0, 0
|
|
|
- for k, v := range d.data {
|
|
|
- all += len(v)
|
|
|
- if strings.HasPrefix(k, date) {
|
|
|
- delete(d.data, k)
|
|
|
- }
|
|
|
- }
|
|
|
- for _, v := range d.data {
|
|
|
- all1 += len(v)
|
|
|
- }
|
|
|
- log.Println("更新前后数据:", all, all1)
|
|
|
- d.keymap = d.GetLatelyFiveDay()
|
|
|
- d.lock.Unlock()
|
|
|
- time.AfterFunc(time.Duration(int64(t2.Unix()-now1.Unix()))*time.Second, d.update)
|
|
|
-}
|
|
|
-
|
|
|
-func (d *datamap) GetLatelyFiveDay() []string {
|
|
|
- array := make([]string, d.days)
|
|
|
- now := time.Now()
|
|
|
- for i := 0; i < d.days; i++ {
|
|
|
- array[i] = now.Format(qutil.Date_yyyyMMdd)
|
|
|
- now = now.AddDate(0, 0, -1)
|
|
|
- }
|
|
|
- return array
|
|
|
-}
|