datamap.go.bak 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. "math"
  6. qutil "qfw/util"
  7. "strings"
  8. "sync"
  9. "time"
  10. )
  11. type Info struct {
  12. id string
  13. title string
  14. area string
  15. city string
  16. subtype string
  17. buyer string
  18. agency string //代理机构
  19. winner string //中标单位
  20. projectname string
  21. projectcode string
  22. publishtime int64
  23. ContainSpecialWord bool
  24. }
  25. var datelimit = float64(432000)
  26. type datamap struct {
  27. lock sync.Mutex //锁
  28. days int //保留几天数据
  29. data map[string][]*Info
  30. keymap []string
  31. }
  32. func NewDatamap(days int) *datamap {
  33. datelimit = qutil.Float64All(days * 86400)
  34. dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}}
  35. dm.keymap = dm.GetLatelyFiveDay()
  36. //初始化加载数据
  37. sess := mgo.GetMgoConn()
  38. defer mgo.DestoryMongoConn(sess)
  39. it := sess.DB(mgo.DbName).C(extract).Find(nil).Sort("-_id").Iter()
  40. now1 := time.Now().Unix()
  41. n, continuSum := 0, 0
  42. for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
  43. //
  44. if qutil.IntAll(tmp["repeat"]) == 1 || qutil.ObjToString(tmp["subtype"]) == "变更" {
  45. continuSum++
  46. } else {
  47. cm := tmp["comeintime"]
  48. comeintime := qutil.Int64All(cm)
  49. if qutil.Float64All(now1-comeintime) < datelimit {
  50. info := NewInfo(tmp)
  51. k := fmt.Sprintf("%s_%s_%s", qutil.FormatDateWithObj(&cm, qutil.Date_yyyyMMdd), info.subtype, info.area)
  52. data := dm.data[k]
  53. if data == nil {
  54. data = []*Info{}
  55. //log.Println(k)
  56. }
  57. data = append(data, info)
  58. dm.data[k] = data
  59. } else {
  60. break
  61. }
  62. }
  63. if n%5000 == 0 {
  64. log.Println("current n:", n, continuSum)
  65. }
  66. tmp = make(map[string]interface{})
  67. }
  68. log.Println("load data:", n)
  69. //启动定时任务
  70. now := time.Now()
  71. t2 := time.Date(now.Year(), now.Month(), now.Day()+1, 0, 0, 0, 0, time.Local)
  72. go time.AfterFunc(time.Duration(int64(t2.Unix()-now.Unix()))*time.Second, func() {
  73. //go time.AfterFunc(time.Duration(10)*time.Second, func() {
  74. dm.update()
  75. })
  76. return dm
  77. }
  78. func NewInfo(tmp map[string]interface{}) *Info {
  79. subtype := qutil.ObjToString(tmp["subtype"])
  80. area := qutil.ObjToString(tmp["area"])
  81. if area == "A" {
  82. area = "全国"
  83. }
  84. info := &Info{}
  85. info.id = qutil.BsonIdToSId(tmp["_id"])
  86. info.title = qutil.ObjToString(tmp["title"])
  87. info.area = area
  88. info.subtype = subtype
  89. info.buyer = qutil.ObjToString(tmp["buyer"])
  90. info.projectname = qutil.ObjToString(tmp["projectname"])
  91. info.ContainSpecialWord = FilterRegexp.MatchString(info.projectname) || FilterRegexp.MatchString(info.title)
  92. info.projectcode = qutil.ObjToString(tmp["projectcode"])
  93. info.city = qutil.ObjToString(tmp["city"])
  94. info.agency = qutil.ObjToString(tmp["agency"])
  95. //info.winner = qutil.ObjToString(tmp["winner"])
  96. info.publishtime = qutil.Int64All(tmp["publishtime"])
  97. return info
  98. }
  99. func (d *datamap) check(info *Info) (b bool, id string) {
  100. d.lock.Lock()
  101. defer d.lock.Unlock()
  102. keys := []string{}
  103. for _, k := range d.keymap {
  104. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
  105. if info.area != "全国" { //这个后续可以不要
  106. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
  107. }
  108. }
  109. L:
  110. for _, k := range keys {
  111. data := d.data[k]
  112. if len(data) > 1 { //对比
  113. for _, v := range data {
  114. if math.Abs(qutil.Float64All(v.publishtime-info.publishtime)) > datelimit {
  115. continue
  116. }
  117. if v.agency != "" && info.agency != "" && v.agency != info.agency {
  118. continue
  119. }
  120. n := 0
  121. if v.buyer != "" && v.buyer == info.buyer {
  122. n++
  123. }
  124. if v.projectname != "" && v.projectname == info.projectname {
  125. n++
  126. }
  127. if !info.ContainSpecialWord && n > 1 {
  128. b = true
  129. id = v.id
  130. break L
  131. } else if v.projectcode != "" && v.projectcode == info.projectcode {
  132. n++
  133. }
  134. if !info.ContainSpecialWord && n > 1 || n > 2 {
  135. b = true
  136. id = v.id
  137. break L
  138. }
  139. //标题长度大于10且相等即为重复
  140. // if len([]rune(info.title)) > 10 && v.title == info.title {
  141. // b = true
  142. // id = v.id
  143. // break L
  144. // }
  145. //标题长度大于10且包含关系+buyer/projectname/projectcode/city(全国/A的只判断包含关系即可)相等即为重复
  146. if len([]rune(info.title)) > 10 && len([]rune(v.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  147. if info.area == "全国" || n > 0 || info.city == v.city {
  148. b = true
  149. id = v.id
  150. break L
  151. }
  152. }
  153. }
  154. }
  155. }
  156. if !b {
  157. k := fmt.Sprintf("%s_%s_%s", time.Now().Format(qutil.Date_yyyyMMdd), info.subtype, info.area)
  158. data := d.data[k]
  159. if data == nil {
  160. data = []*Info{info}
  161. } else {
  162. data = append(data, info)
  163. }
  164. d.data[k] = data
  165. }
  166. return
  167. }
  168. func (d *datamap) update() {
  169. //每天0点清除历史数据
  170. d.lock.Lock()
  171. now, now1 := time.Now(), time.Now()
  172. t2 := time.Date(now1.Year(), now1.Month(), now1.Day()+1, 0, 0, 0, 0, time.Local)
  173. date := now.AddDate(0, 0, -d.days).Format(qutil.Date_yyyyMMdd)
  174. all, all1 := 0, 0
  175. for k, v := range d.data {
  176. all += len(v)
  177. if strings.HasPrefix(k, date) {
  178. delete(d.data, k)
  179. }
  180. }
  181. for _, v := range d.data {
  182. all1 += len(v)
  183. }
  184. log.Println("更新前后数据:", all, all1)
  185. d.keymap = d.GetLatelyFiveDay()
  186. d.lock.Unlock()
  187. time.AfterFunc(time.Duration(int64(t2.Unix()-now1.Unix()))*time.Second, d.update)
  188. }
  189. func (d *datamap) GetLatelyFiveDay() []string {
  190. array := make([]string, d.days)
  191. now := time.Now()
  192. for i := 0; i < d.days; i++ {
  193. array[i] = now.Format(qutil.Date_yyyyMMdd)
  194. now = now.AddDate(0, 0, -1)
  195. }
  196. return array
  197. }