datamap.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. "math"
  6. qutil "qfw/util"
  7. "qfw/util/mongodb"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "time"
  12. )
  13. type Info struct {
  14. id string
  15. title string
  16. area string
  17. city string
  18. subtype string
  19. buyer string
  20. agency string //代理机构
  21. winner string //中标单位
  22. projectname string
  23. projectcode string
  24. publishtime int64
  25. comeintime int64
  26. bidopentime int64 //开标时间
  27. agencyaddr string//开标地点
  28. detail string//招标内容
  29. site string//站点
  30. ContainSpecialWord bool
  31. }
  32. var datelimit = float64(432000)
  33. var mm int
  34. type datamap struct {
  35. lock sync.Mutex //锁
  36. days int //保留几天数据
  37. data map[string][]*Info
  38. keymap []string
  39. keys map[string]bool
  40. }
  41. func NewDatamap(days int, lastid string) *datamap {
  42. datelimit = qutil.Float64All(days * 86400)
  43. dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, map[string]bool{}}
  44. if lastid == "" {
  45. return dm
  46. }
  47. //初始化加载数据
  48. sess := mgo.GetMgoConn()
  49. defer mgo.DestoryMongoConn(sess)
  50. it := sess.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$lte":"`+lastid+`"}}`, true)).Sort("-_id").Iter()
  51. now1 := int64(0)
  52. n, continuSum := 0, 0
  53. for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  54. //|| qutil.ObjToString(tmp["subtype"]) == "变更" //变更的数据打开
  55. if qutil.IntAll(tmp["repeat"]) == 1 {
  56. continuSum++
  57. } else {
  58. cm := tmp["comeintime"] //时间单位
  59. //cm := tmp["publishtime"]
  60. comeintime := qutil.Int64All(cm)
  61. if comeintime == 0 {
  62. id := qutil.BsonIdToSId(tmp["_id"])[0:8]
  63. comeintime, _ = strconv.ParseInt(id, 16, 64)
  64. }
  65. if now1 == 0 {
  66. now1 = comeintime
  67. }
  68. if qutil.Float64All(now1-comeintime) < datelimit {
  69. info := NewInfo(tmp)
  70. dkey := qutil.FormatDateWithObj(&cm, qutil.Date_yyyyMMdd)
  71. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  72. data := dm.data[k]
  73. if data == nil {
  74. data = []*Info{}
  75. //log.Println(k)
  76. }
  77. data = append(data, info)
  78. dm.data[k] = data
  79. dm.keys[dkey] = true
  80. } else {
  81. break
  82. }
  83. }
  84. if n%5000 == 0 {
  85. log.Println("current n:", n, continuSum)
  86. }
  87. tmp = make(map[string]interface{})
  88. }
  89. log.Println("load data:", n)
  90. return dm
  91. }
  92. func NewInfo(tmp map[string]interface{}) *Info {
  93. subtype := qutil.ObjToString(tmp["subtype"])
  94. area := qutil.ObjToString(tmp["area"])
  95. if area == "A" {
  96. area = "全国"
  97. }
  98. info := &Info{}
  99. info.id = qutil.BsonIdToSId(tmp["_id"])
  100. info.title = qutil.ObjToString(tmp["title"])
  101. info.area = area
  102. info.subtype = subtype
  103. info.buyer = qutil.ObjToString(tmp["buyer"])
  104. info.projectname = qutil.ObjToString(tmp["projectname"])
  105. //info.ContainSpecialWord = FilterRegexp.MatchString(info.projectname) || FilterRegexp.MatchString(info.title)
  106. info.ContainSpecialWord = FilterRegTitle.MatchString(info.title)
  107. info.projectcode = qutil.ObjToString(tmp["projectcode"])
  108. info.city = qutil.ObjToString(tmp["city"])
  109. info.agency = qutil.ObjToString(tmp["agency"])
  110. //info.winner = qutil.ObjToString(tmp["winner"])
  111. info.publishtime = qutil.Int64All(tmp["publishtime"])
  112. info.bidopentime = qutil.Int64All(tmp["bidopentime"])
  113. info.agencyaddr = qutil.ObjToString(tmp["agencyaddr"])
  114. info.detail = qutil.ObjToString(tmp["detail"])
  115. info.site = qutil.ObjToString(tmp["site"])
  116. return info
  117. }
  118. func (d *datamap) check(info *Info) (b bool, id string) {
  119. d.lock.Lock()
  120. defer d.lock.Unlock()
  121. keys := []string{}
  122. for k, _ := range d.keys {
  123. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
  124. if info.area != "全国" { //这个后续可以不要
  125. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
  126. }
  127. }
  128. L:
  129. for _, k := range keys {
  130. data := d.data[k]
  131. if len(data) > 0 { //对比
  132. for _, v := range data {
  133. //正常重复
  134. if v.id == info.id {
  135. return false, v.id
  136. }
  137. if math.Abs(qutil.Float64All(v.publishtime-info.publishtime)) > datelimit {
  138. continue
  139. }
  140. if v.agency != "" && info.agency != "" && v.agency != info.agency {
  141. continue
  142. }
  143. if info.subtype==v.subtype {
  144. if info.subtype == "变更" {
  145. //以下为新增方法 , 变更数据判重处理 v为原数据 info为目标数据
  146. if info.publishtime<v.publishtime{
  147. continue
  148. }
  149. if info.ContainSpecialWord&&info.title!=v.title&&v.title!="" {
  150. continue
  151. }
  152. if v.projectcode != info.projectcode&&len([]rune(info.projectcode)) >=10&&v.projectcode!=""{
  153. continue
  154. }
  155. //同城判定有效
  156. first_judge:= false
  157. if (v.projectcode != ""&&v.projectcode==info.projectcode&&v.projectname != ""&&v.projectname==info.projectname)||
  158. (v.title != ""&&v.title==info.title&&v.bidopentime != 0&&v.bidopentime==info.bidopentime&&v.detail != ""&&v.detail==info.detail) {
  159. first_judge = true
  160. }
  161. //3/6等判断
  162. n := 0
  163. if v.title != "" && v.title == info.title {
  164. n++
  165. }
  166. if v.projectname != "" && v.projectname == info.projectname {
  167. n++
  168. }
  169. if v.projectcode != "" && v.projectcode == info.projectcode {
  170. n++
  171. }
  172. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  173. n++
  174. }
  175. if v.agencyaddr != "" && v.agencyaddr == info.agencyaddr {
  176. n++
  177. }
  178. if v.detail != "" && v.detail == info.detail {
  179. n++
  180. }
  181. t:= judgeCityType(v.area,info.area,v.city,info.city)
  182. if n>=3||first_judge==true {
  183. if t==2 {
  184. //同城
  185. b = true
  186. id = v.id
  187. log.Print("同城满足的",info.id)
  188. break L
  189. }
  190. }
  191. }else {//非变更数据判重处理
  192. n:=0 //三要素
  193. m:=0 //二要素
  194. x:=0 //四要素
  195. if info.buyer != "" &&v.buyer == info.buyer {
  196. n++
  197. x++
  198. }
  199. if info.projectname != ""&&v.projectname == info.projectname {
  200. n++
  201. m++
  202. x++
  203. }
  204. if info.projectcode != ""&&v.projectcode == info.projectcode {
  205. n++
  206. m++
  207. x++
  208. }
  209. if info.title != ""&&v.title == info.title {
  210. x++
  211. }
  212. t:= judgeCityType(v.area,info.area,v.city,info.city)
  213. c_1 :=conditionTitle(v.title,info.title) //标题满足
  214. c_2 :=conditionNum(v.projectcode,info.projectcode) //编号满足
  215. c_3 :=conditionTAB(v.title,info.title,v.buyer,info.buyer) //标题+采购单位
  216. //同站点判断
  217. if info.site != "" && v.site == info.site {
  218. if n>1||c_1||c_2 {
  219. b = true
  220. id = v.id
  221. log.Println("站点满足过滤")
  222. break L
  223. }
  224. }else {
  225. if info.ContainSpecialWord&&info.title!=v.title&&v.title!="" {
  226. continue
  227. }
  228. if v.projectcode != info.projectcode&&len([]rune(info.projectcode)) >=10&&v.projectcode!=""{
  229. continue
  230. }
  231. //先决条件满足三要素
  232. if n==3{
  233. b = true
  234. id = v.id
  235. break L
  236. }
  237. //城市判断
  238. if t==0||t==1 { //最少一个全国
  239. if c_1 && (c_2||n>1) {
  240. b = true
  241. id = v.id
  242. break L
  243. }
  244. if c_2&&x>2{
  245. b = true
  246. id = v.id
  247. break L
  248. }
  249. }else if t==2 { // 省-市
  250. if c_1||c_2||n>1 {
  251. b = true
  252. id = v.id
  253. break L
  254. }
  255. }else if t==3 {// !省 !市
  256. if (c_1&&n>1)||(c_2&&x>2){
  257. b = true
  258. id = v.id
  259. break L
  260. }
  261. }else if t==4 {// 省 !市
  262. if m>1||(c_1&&m>0)||(c_2&&x>1)||(c_3&&n>1){
  263. b = true
  264. id = v.id
  265. break L
  266. }
  267. }else {
  268. }
  269. }
  270. }
  271. }
  272. ////非变更数据判重处理
  273. //n := 0
  274. //if v.buyer != "" && v.buyer == info.buyer {
  275. // n++
  276. //}
  277. //if v.projectname != "" && v.projectname == info.projectname {
  278. // n++
  279. //}
  280. //if !info.ContainSpecialWord && n > 1 {
  281. // b = true
  282. // id = v.id
  283. // break L
  284. //} else if v.projectcode != "" && v.projectcode == info.projectcode {
  285. // n++
  286. //}
  287. //if !info.ContainSpecialWord && n > 1 || n > 2 {
  288. // b = true
  289. // id = v.id
  290. // break L
  291. //}
  292. ////标题长度大于10且相等即为重复
  293. //// if len([]rune(info.title)) > 10 && v.title == info.title {
  294. //// b = true
  295. //// id = v.id
  296. //// break L
  297. //// }
  298. ////标题长度大于10且包含关系+buyer/projectname/projectcode/city(全国/A的只判断包含关系即可)相等即为重复
  299. //if len([]rune(info.title)) > 10 && len([]rune(v.title)) > 10 && (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  300. // if info.area == "全国" || n > 0 || info.city == v.city {
  301. // b = true
  302. // id = v.id
  303. // break L
  304. // }
  305. //}
  306. }
  307. }
  308. }
  309. //往预存数据 d 添加
  310. if !b {
  311. ct, _ := strconv.ParseInt(info.id[:8], 16, 64)
  312. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  313. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  314. data := d.data[k]
  315. if data == nil {
  316. data = []*Info{info}
  317. d.data[k] = data
  318. if !d.keys[dkey] {
  319. d.keys[dkey] = true
  320. d.update(ct)
  321. }
  322. } else {
  323. data = append(data, info)
  324. d.data[k] = data
  325. }
  326. }
  327. return
  328. }
  329. //判断是否同城等情况
  330. func judgeCityType(v string, info string,v_c string,info_c string) (t int) {
  331. t=0
  332. if (v=="全国"||v=="")&&(info=="全国"||info=="") {//均为全国
  333. t=0
  334. }else if v!="全国"&&info!="全国"&&v!=""&&info!=""&&
  335. v_c!="全国"&&info_c!="全国"&&v_c!=""&&info_c!=""{//均非全国
  336. if v==info &&v_c==info_c { //同省同城
  337. t=2
  338. }else if v!=info&&v_c!=info_c{//非同省非同城
  339. t=3
  340. }else {//同省非同城
  341. t=4
  342. }
  343. }else {//有且一个全国
  344. t=1
  345. }
  346. return t
  347. }
  348. //条件一 标题
  349. func conditionTitle(t1 string, t2 string) bool {
  350. if len([]rune(t1))>10 && len([]rune(t2))>10&&
  351. (strings.Contains(t1, t2)||strings.Contains(t2, t1)) {
  352. return true
  353. }
  354. return false
  355. }
  356. //条件二 项目编号
  357. func conditionNum(c1 string ,c2 string) bool {
  358. if c1 == c2&&len([]rune(c1)) >=10 {
  359. return true
  360. }
  361. return false
  362. }
  363. //条件三 采购单位+标题
  364. func conditionTAB(t1 string ,t2 string,b1 string,b2 string) bool {
  365. if t1==t2&&b1==b2 {
  366. return true
  367. }
  368. return false
  369. }
  370. func (d *datamap) update(t int64) {
  371. //每天0点清除历史数据
  372. d.keymap = d.GetLatelyFiveDay(t)
  373. m := map[string]bool{}
  374. for _, v := range d.keymap {
  375. m[v] = true
  376. }
  377. all, all1 := 0, 0
  378. for k, v := range d.data {
  379. all += len(v)
  380. if !m[k[:8]] {
  381. delete(d.data, k)
  382. }
  383. }
  384. for k, _ := range d.keys {
  385. if !m[k] {
  386. delete(d.keys, k)
  387. }
  388. }
  389. for _, v := range d.data {
  390. all1 += len(v)
  391. }
  392. //log.Println("更新前后数据:", all, all1)
  393. }
  394. func (d *datamap) GetLatelyFiveDay(t int64) []string {
  395. array := make([]string, d.days)
  396. now := time.Unix(t, 0)
  397. for i := 0; i < d.days; i++ {
  398. array[i] = now.Format(qutil.Date_yyyyMMdd)
  399. now = now.AddDate(0, 0, -1)
  400. }
  401. return array
  402. }