datamap.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. qutil "qfw/util"
  6. "qfw/util/mongodb"
  7. "strconv"
  8. "strings"
  9. "sync"
  10. "time"
  11. )
  12. type Info struct {
  13. id string
  14. title string //标题
  15. area string //省份
  16. city string //城市
  17. subtype string //信息类型
  18. buyer string //采购单位
  19. agency string //代理机构
  20. winner string //中标单位
  21. budget float64 //预算金额
  22. bidamount float64 //中标金额
  23. projectname string //项目名称
  24. projectcode string //项目编号
  25. publishtime int64 //发布时间
  26. comeintime int64 //采集时间
  27. bidopentime int64 //开标时间
  28. agencyaddr string //开标地点
  29. site string //站点
  30. href string //正文的url
  31. repeatid string //重复id
  32. titleSpecialWord bool //标题特殊词
  33. specialWord bool //再次判断的特殊词
  34. mergemap map[string]interface{} //合并记录
  35. }
  36. var datelimit = float64(432000) //五天
  37. var sitelock sync.Mutex //锁
  38. //判重数据
  39. type datamap struct {
  40. lock sync.Mutex //锁
  41. days int //保留几天数据
  42. data map[string][]*Info
  43. keymap []string
  44. keys map[string]bool
  45. }
  46. //历史更新数据
  47. type historymap struct {
  48. lock sync.Mutex //锁
  49. days int //保留几天数据
  50. data map[string][]*Info
  51. keymap []string
  52. keys map[string]bool
  53. }
  54. func NewDatamap(days int, lastid string) *datamap {
  55. datelimit = qutil.Float64All(days * 86400)
  56. dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, map[string]bool{}}
  57. if lastid == "" {
  58. return dm
  59. }
  60. //初始化加载数据
  61. sess := mgo.GetMgoConn()
  62. defer mgo.DestoryMongoConn(sess)
  63. it := sess.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$lte":"`+lastid+`"}}`, true)).Sort("-_id").Iter()
  64. now1 := int64(0)
  65. n, continuSum := 0, 0
  66. for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  67. if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 {
  68. continuSum++
  69. } else {
  70. cm := tmp["comeintime"] //时间单位?
  71. comeintime := qutil.Int64All(cm)
  72. if comeintime == 0 {
  73. id := qutil.BsonIdToSId(tmp["_id"])[0:8]
  74. comeintime, _ = strconv.ParseInt(id, 16, 64)
  75. }
  76. if now1 == 0 {
  77. now1 = comeintime
  78. }
  79. if qutil.Float64All(now1-comeintime) < datelimit {
  80. info := NewInfo(tmp)
  81. //时间字符串
  82. dkey := qutil.FormatDateWithObj(&cm, qutil.Date_yyyyMMdd)
  83. //拼接的一个时间字符串 xxxx_类型_省份
  84. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  85. data := dm.data[k]
  86. if data == nil {
  87. data = []*Info{}
  88. }
  89. data = append(data, info)
  90. dm.data[k] = data
  91. dm.keys[dkey] = true
  92. } else {
  93. break
  94. }
  95. }
  96. if n%5000 == 0 {
  97. log.Println("current n:", n, continuSum)
  98. }
  99. tmp = make(map[string]interface{})
  100. }
  101. log.Println("load data:", n)
  102. return dm
  103. }
  104. //构建新历史数据池
  105. func NewHistorymap(startid string, lastid string, startTime int64, lastTime int64) *historymap {
  106. datelimit = qutil.Float64All(5 * 86400)
  107. hm := &historymap{sync.Mutex{}, 5, map[string][]*Info{}, []string{}, map[string]bool{}}
  108. if lastid == "" || startid == "" {
  109. return hm
  110. }
  111. //取startid之前5天
  112. sess_start := mgo.GetMgoConn()
  113. defer mgo.DestoryMongoConn(sess_start) //lte gte
  114. it_start := sess_start.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$lte":"`+startid+`"}}`,
  115. true)).Sort("-_id").Iter()
  116. m, n := 0, 0
  117. for tmp_start := make(map[string]interface{}); it_start.Next(&tmp_start); {
  118. cm := tmp_start["comeintime"]
  119. comeintime := qutil.Int64All(tmp_start["comeintime"])
  120. if comeintime == 0 {
  121. id := qutil.BsonIdToSId(tmp_start["_id"])[0:8]
  122. comeintime, _ = strconv.ParseInt(id, 16, 64)
  123. }
  124. if qutil.Float64All(startTime-comeintime) <= datelimit {
  125. n++
  126. info := NewInfo(tmp_start)
  127. dkey := qutil.FormatDateWithObj(&cm, qutil.Date_yyyyMMdd)
  128. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  129. data := hm.data[k]
  130. if data == nil {
  131. data = []*Info{}
  132. }
  133. data = append(data, info)
  134. hm.data[k] = data
  135. hm.keys[dkey] = true
  136. } else {
  137. break
  138. }
  139. tmp_start = make(map[string]interface{})
  140. }
  141. log.Println("load history 前:", n)
  142. //取lastid之后5天
  143. sess_last := mgo.GetMgoConn()
  144. defer mgo.DestoryMongoConn(sess_last) //lte gte
  145. it_last := sess_last.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$gte":"`+lastid+`"}}`,
  146. true)).Sort("_id").Iter()
  147. for tmp_last := make(map[string]interface{}); it_last.Next(&tmp_last); {
  148. cm := tmp_last["comeintime"]
  149. comeintime := qutil.Int64All(tmp_last["comeintime"])
  150. if comeintime == 0 {
  151. id := qutil.BsonIdToSId(tmp_last["_id"])[0:8]
  152. comeintime, _ = strconv.ParseInt(id, 16, 64)
  153. }
  154. if qutil.Float64All(comeintime-lastTime) <= datelimit {
  155. m++
  156. info := NewInfo(tmp_last)
  157. dkey := qutil.FormatDateWithObj(&cm, qutil.Date_yyyyMMdd)
  158. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  159. data := hm.data[k]
  160. if data == nil {
  161. data = []*Info{}
  162. }
  163. data = append(data, info)
  164. hm.data[k] = data
  165. hm.keys[dkey] = true
  166. } else {
  167. break
  168. }
  169. tmp_last = make(map[string]interface{})
  170. }
  171. log.Println("load history 后:", m)
  172. return hm
  173. }
  174. func NewInfo(tmp map[string]interface{}) *Info {
  175. subtype := qutil.ObjToString(tmp["subtype"])
  176. area := qutil.ObjToString(tmp["area"])
  177. if area == "A" {
  178. area = "全国"
  179. }
  180. info := &Info{}
  181. info.id = qutil.BsonIdToSId(tmp["_id"])
  182. info.title = qutil.ObjToString(tmp["title"])
  183. info.area = area
  184. info.subtype = subtype
  185. info.buyer = qutil.ObjToString(tmp["buyer"])
  186. info.projectname = qutil.ObjToString(tmp["projectname"])
  187. info.projectcode = qutil.ObjToString(tmp["projectcode"])
  188. info.city = qutil.ObjToString(tmp["city"])
  189. info.agency = qutil.ObjToString(tmp["agency"])
  190. info.winner = qutil.ObjToString(tmp["winner"])
  191. info.budget = qutil.Float64All(tmp["budget"])
  192. info.bidamount = qutil.Float64All(tmp["bidamount"])
  193. info.publishtime = qutil.Int64All(tmp["publishtime"])
  194. info.bidopentime = qutil.Int64All(tmp["bidopentime"])
  195. info.agencyaddr = qutil.ObjToString(tmp["agencyaddr"])
  196. //info.detail = qutil.ObjToString(tmp["detail"])
  197. info.site = qutil.ObjToString(tmp["site"])
  198. info.href = qutil.ObjToString(tmp["href"])
  199. info.repeatid = qutil.ObjToString(tmp["repeatid"])
  200. info.specialWord = FilterRegTitle.MatchString(info.title)
  201. info.titleSpecialWord = FilterRegTitle_1.MatchString(info.title) || FilterRegTitle_2.MatchString(info.title)
  202. info.mergemap = *qutil.ObjToMap(tmp["merge_map"])
  203. if info.mergemap == nil {
  204. info.mergemap = make(map[string]interface{}, 0)
  205. }
  206. return info
  207. }
  208. //判重方法
  209. func (d *datamap) check(info *Info) (b bool, source *Info, reason string) {
  210. keys := []string{}
  211. d.lock.Lock()
  212. for k, _ := range d.keys { //不同时间段
  213. //...代码
  214. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
  215. if info.area != "全国" { //这个后续可以不要
  216. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
  217. }
  218. }
  219. d.lock.Unlock()
  220. L:
  221. for _, k := range keys {
  222. d.lock.Lock()
  223. data := d.data[k]
  224. d.lock.Unlock()
  225. if len(data) > 0 { //对比v 找到同类型,同省或全国的数据作对比
  226. for _, v := range data {
  227. reason = ""
  228. if v.id == info.id { //正常重复
  229. return false, v, ""
  230. }
  231. //类型分组
  232. if info.subtype == v.subtype {
  233. //站点配置--
  234. if info.site != "" {
  235. sitelock.Lock()
  236. dict := SiteMap[info.site]
  237. sitelock.Unlock()
  238. if dict != nil {
  239. //临时改变--具体值
  240. if info.area == "全国" && dict["area"] != "" {
  241. info.area = qutil.ObjToString(dict["area"])
  242. info.city = qutil.ObjToString(dict["city"])
  243. } else {
  244. if info.city == "" && dict["city"] != "" {
  245. info.area = qutil.ObjToString(dict["area"])
  246. info.city = qutil.ObjToString(dict["city"])
  247. }
  248. }
  249. }
  250. }
  251. //前置条件1 站点相关
  252. if info.site != "" && info.site == v.site {
  253. if info.href != "" && info.href == v.href {
  254. reason = "href相同"
  255. b = true
  256. source = v
  257. break L
  258. }
  259. if info.href != "" && info.href != v.href {
  260. reason = "href不同"
  261. }
  262. }
  263. //前置条件2 标题相关 - 有且一个关键词
  264. if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
  265. info.title != v.title && v.title != "" && info.title != "" {
  266. continue
  267. }
  268. //前置条件3 标题相关 - 均含有关键词
  269. if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
  270. len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
  271. if !(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  272. continue //无包含关系
  273. }
  274. if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) {
  275. reason = "标题关键词且包含关系"
  276. b = true
  277. source = v
  278. break L
  279. }
  280. }
  281. //代理机构相同-非空相等
  282. if v.agency != "" && info.agency != "" && v.agency == info.agency {
  283. reason = reason + "同机构-"
  284. repeat := false
  285. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  286. b = true
  287. source = v
  288. break
  289. }
  290. } else {
  291. reason = reason + "非同机构-"
  292. if info.city != "" && info.city == v.city {
  293. reason = reason + "同城-"
  294. repeat := false
  295. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  296. b = true
  297. source = v
  298. break
  299. }
  300. } else {
  301. reason = reason + "不同城-"
  302. repeat := false
  303. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  304. b = true
  305. source = v
  306. break
  307. }
  308. }
  309. }
  310. }
  311. }
  312. }
  313. }
  314. //往预存数据 d 添加
  315. if !b {
  316. ct := info.publishtime
  317. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  318. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  319. d.lock.Lock()
  320. data := d.data[k]
  321. if data == nil {
  322. data = []*Info{info}
  323. d.data[k] = data
  324. if !d.keys[dkey] {
  325. d.keys[dkey] = true
  326. d.update(ct)
  327. }
  328. } else {
  329. data = append(data, info)
  330. d.data[k] = data
  331. }
  332. d.lock.Unlock()
  333. }
  334. return
  335. }
  336. func (h *historymap) checkHistory(info *Info) (b bool, source *Info, reasons string) {
  337. h.lock.Lock()
  338. defer h.lock.Unlock()
  339. keys := []string{}
  340. //不同时间段
  341. for k, _ := range h.keys {
  342. //...代码
  343. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
  344. if info.area != "全国" { //这个后续可以不要
  345. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
  346. }
  347. }
  348. L:
  349. for _, k := range keys {
  350. data := h.data[k]
  351. if len(data) > 0 { //对比v 找到同类型,同省或全国的数据作对比
  352. for _, v := range data {
  353. reason := ""
  354. if v.id == info.id { //正常重复
  355. return false, v, ""
  356. }
  357. //类型分组
  358. if info.subtype == v.subtype {
  359. //站点配置--
  360. if info.site != "" {
  361. dict := SiteMap[info.site]
  362. if dict != nil {
  363. //临时改变--具体值
  364. if info.area == "全国" && dict["area"] != "" {
  365. info.area = qutil.ObjToString(dict["area"])
  366. info.city = qutil.ObjToString(dict["city"])
  367. } else {
  368. if info.city == "" && dict["city"] != "" {
  369. info.area = qutil.ObjToString(dict["area"])
  370. info.city = qutil.ObjToString(dict["city"])
  371. }
  372. }
  373. }
  374. }
  375. //前置条件1 站点相关
  376. if info.site != "" && info.site == v.site {
  377. if info.href != "" && info.href == v.href {
  378. reason = "href相同"
  379. b = true
  380. source = v
  381. reasons = reason
  382. break L
  383. }
  384. if info.href != "" && info.href != v.href {
  385. reason = "href不同"
  386. }
  387. }
  388. //前置条件2 标题相关 - 有且一个关键词
  389. if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
  390. info.title != v.title && v.title != "" && info.title != "" {
  391. continue
  392. }
  393. //前置条件3 标题相关 - 均含有关键词
  394. if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
  395. len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
  396. if !(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  397. continue //无包含关系
  398. }
  399. if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) {
  400. reason = "标题关键词且包含关系"
  401. b = true
  402. source = v
  403. reasons = reason
  404. break L
  405. }
  406. }
  407. //代理机构相同-非空相等
  408. if v.agency != "" && info.agency != "" && v.agency == info.agency {
  409. reason = reason + "同机构-"
  410. repeat := false
  411. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  412. b = true
  413. source = v
  414. break
  415. }
  416. } else {
  417. reason = reason + "非同机构-"
  418. if info.city != "" && info.city == v.city {
  419. reason = reason + "同城-"
  420. repeat := false
  421. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  422. b = true
  423. source = v
  424. break
  425. }
  426. } else {
  427. reason = reason + "不同城-"
  428. repeat := false
  429. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  430. b = true
  431. source = v
  432. break
  433. }
  434. }
  435. }
  436. }
  437. }
  438. }
  439. }
  440. //
  441. if b {
  442. //判重
  443. if info.repeatid == source.id {
  444. //重复-无变化-不处理
  445. b = false
  446. } else {
  447. if source.id != "" {
  448. //重复-有变化-覆盖记录处理
  449. }
  450. }
  451. } else {
  452. if source != nil {
  453. if source.repeatid != "" {
  454. //未判重-有变化--记录
  455. b = true
  456. reasons = "未判重记录"
  457. }
  458. }
  459. }
  460. //往预存数据 d 添加
  461. if !b {
  462. ct, _ := strconv.ParseInt(info.id[:8], 16, 64)
  463. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  464. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  465. data := h.data[k]
  466. if data == nil {
  467. data = []*Info{info}
  468. h.data[k] = data
  469. if !h.keys[dkey] {
  470. h.keys[dkey] = true
  471. //h.update(ct)
  472. }
  473. } else {
  474. data = append(data, info)
  475. h.data[k] = data
  476. }
  477. }
  478. return
  479. }
  480. //替换原始数据池
  481. func (d *datamap) replaceSourceData(replaceData *Info, replaceId string) {
  482. ct, _ := strconv.ParseInt(replaceId[:8], 16, 64)
  483. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  484. k := fmt.Sprintf("%s_%s_%s", dkey, replaceData.subtype, replaceData.area)
  485. d.lock.Lock()
  486. data := d.data[k]
  487. if data == nil {
  488. data = []*Info{replaceData}
  489. d.data[k] = data
  490. if !d.keys[dkey] {
  491. d.keys[dkey] = true
  492. d.update(ct)
  493. }
  494. } else {
  495. //遍历替换
  496. for k, v := range data {
  497. if v.id == replaceId {
  498. data[k] = replaceData
  499. break
  500. }
  501. }
  502. d.data[k] = data
  503. }
  504. d.lock.Unlock()
  505. }
  506. func (h *historymap) replaceSourceData(replaceData *Info, replaceId string) {
  507. ct, _ := strconv.ParseInt(replaceId[:8], 16, 64)
  508. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  509. k := fmt.Sprintf("%s_%s_%s", dkey, replaceData.subtype, replaceData.area)
  510. data := h.data[k]
  511. if data == nil {
  512. data = []*Info{replaceData}
  513. h.data[k] = data
  514. if !h.keys[dkey] {
  515. h.keys[dkey] = true
  516. //h.update(ct)
  517. }
  518. } else {
  519. //遍历替换
  520. for k, v := range data {
  521. if v.id == replaceId {
  522. data[k] = replaceData
  523. break
  524. }
  525. }
  526. h.data[k] = data
  527. }
  528. }
  529. //以下为判重 - 一揽子的方法
  530. //判重方法1
  531. func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
  532. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  533. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  534. info.subtype == "变更" || info.subtype == "其他" {
  535. //招标结果
  536. if tenderRepeat_A(v, info, reason) {
  537. if tenderRepeat_C(v, info) {
  538. return false, reason
  539. } else {
  540. reason = reason + "---招标类"
  541. return true, reason
  542. }
  543. } else {
  544. return false, reason
  545. }
  546. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  547. //中标结果
  548. if winningRepeat_A(v, info, reason) {
  549. if winningRepeat_C(v, info) {
  550. return false, reason
  551. } else {
  552. reason = reason + "---中标类"
  553. return true, reason
  554. }
  555. } else {
  556. return false, reason
  557. }
  558. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  559. //合同
  560. if contractRepeat_A(v, info, reason) {
  561. if contractRepeat_C(v, info) {
  562. return false, reason
  563. } else {
  564. reason = reason + "---合同类"
  565. return true, reason
  566. }
  567. } else {
  568. return false, reason
  569. }
  570. } else {
  571. //招标结果
  572. if tenderRepeat_A(v, info, reason) {
  573. if tenderRepeat_C(v, info) {
  574. return false, reason
  575. } else {
  576. reason = reason + "---类别空-招标类"
  577. return true, reason
  578. }
  579. } else {
  580. return false, reason
  581. }
  582. }
  583. return false, reason
  584. }
  585. //判重方法2
  586. func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
  587. //相同
  588. if v.agency == info.agency && v.agency != "" && info.agency != "" {
  589. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  590. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  591. info.subtype == "变更" || info.subtype == "其他" {
  592. //招标结果
  593. if tenderRepeat_B(v, info, reason) {
  594. if tenderRepeat_C(v, info) { //有不同
  595. return false, reason
  596. } else {
  597. reason = reason + "---招标类"
  598. return true, reason
  599. }
  600. } else {
  601. return false, reason
  602. }
  603. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  604. //中标结果
  605. if winningRepeat_B(v, info, reason) {
  606. if winningRepeat_C(v, info) { //有不同
  607. return false, reason
  608. } else {
  609. reason = reason + "---中标类"
  610. return true, reason
  611. }
  612. } else {
  613. return false, reason
  614. }
  615. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  616. //合同
  617. if contractRepeat_B(v, info, reason) {
  618. if contractRepeat_C(v, info) { //有不同
  619. return false, reason
  620. } else {
  621. reason = reason + "---合同类"
  622. return true, reason
  623. }
  624. } else {
  625. return false, reason
  626. }
  627. } else {
  628. //招标结果
  629. if tenderRepeat_B(v, info, reason) {
  630. if tenderRepeat_C(v, info) { //有不同
  631. return false, reason
  632. } else {
  633. reason = reason + "---类别空-招标类"
  634. return true, reason
  635. }
  636. } else {
  637. return false, reason
  638. }
  639. }
  640. }
  641. //不同
  642. if v.agency != info.agency && v.agency != "" && info.agency != "" {
  643. return false, reason
  644. }
  645. //机构最少一个为空
  646. if v.agency == "" || info.agency == "" {
  647. var repeat = false
  648. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  649. reason = reason + "---机构最少一个空"
  650. return true, reason
  651. } else {
  652. return false, reason
  653. }
  654. }
  655. return false, reason
  656. }
  657. //招标_A
  658. func tenderRepeat_A(v *Info, info *Info, reason string) bool {
  659. var ss string
  660. p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
  661. if v.projectname != "" && v.projectname == info.projectname {
  662. ss = ss + "p1(名称)-"
  663. p1 = true
  664. }
  665. if v.buyer != "" && v.buyer == info.buyer {
  666. ss = ss + "p2(单位)-"
  667. p2 = true
  668. }
  669. if v.projectcode != "" && v.projectcode == info.projectcode {
  670. ss = ss + "p3(编号)-"
  671. p3 = true
  672. }
  673. if v.budget != 0 && v.budget == info.budget {
  674. ss = ss + "p4(预算)-"
  675. p4 = true
  676. }
  677. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  678. ss = ss + "p9(开标时间)-"
  679. p9 = true
  680. }
  681. if v.agencyaddr != "" && v.agencyaddr == info.agencyaddr {
  682. ss = ss + "p10(开标地点)-"
  683. p10 = true
  684. }
  685. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  686. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  687. ss = ss + "p11(标题)-"
  688. p11 = true
  689. }
  690. if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
  691. (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
  692. (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
  693. (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
  694. (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
  695. (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
  696. (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
  697. reason = reason + "满足招标A,3要素组合-" + ss + ","
  698. return true
  699. }
  700. return false
  701. }
  702. //招标_B
  703. func tenderRepeat_B(v *Info, info *Info, reason string) bool {
  704. m, n := 0, 0
  705. if v.projectname != "" && v.projectname == info.projectname {
  706. m++
  707. n++
  708. }
  709. if v.buyer != "" && v.buyer == info.buyer {
  710. m++
  711. }
  712. if v.projectcode != "" && v.projectcode == info.projectcode {
  713. m++
  714. }
  715. if v.budget != 0 && v.budget == info.budget {
  716. m++
  717. }
  718. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  719. m++
  720. }
  721. if v.agencyaddr != "" && v.agencyaddr == info.agencyaddr {
  722. m++
  723. }
  724. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  725. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  726. m++
  727. n++
  728. }
  729. if m >= 2 {
  730. if n == 2 && m == 2 {
  731. return false
  732. } else {
  733. reason = reason + "满足招标B,七选二,"
  734. return true
  735. }
  736. }
  737. return false
  738. }
  739. //招标_C
  740. func tenderRepeat_C(v *Info, info *Info) bool {
  741. if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
  742. return true
  743. }
  744. //原始地址...
  745. if v.bidopentime != 0 && info.bidopentime != 0 && v.bidopentime != info.bidopentime {
  746. return true
  747. }
  748. if v.agencyaddr != "" && info.agencyaddr != "" && v.agencyaddr != info.agencyaddr {
  749. return true
  750. }
  751. return false
  752. }
  753. //中标_A
  754. func winningRepeat_A(v *Info, info *Info, reason string) bool {
  755. var ss string
  756. p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
  757. if v.projectname != "" && v.projectname == info.projectname {
  758. ss = ss + "p1(项目名称)-"
  759. p1 = true
  760. }
  761. if v.buyer != "" && v.buyer == info.buyer {
  762. ss = ss + "p2(单位)-"
  763. p2 = true
  764. }
  765. if v.projectcode != "" && v.projectcode == info.projectcode {
  766. ss = ss + "p3(编号)-"
  767. p3 = true
  768. }
  769. if v.bidamount != 0 && v.bidamount == info.bidamount {
  770. ss = ss + "p5(中标金)-"
  771. p5 = true
  772. }
  773. if v.winner != "" && v.winner == info.winner {
  774. ss = ss + "p6(中标人)-"
  775. p6 = true
  776. }
  777. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  778. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  779. ss = ss + "p11(标题)-"
  780. p11 = true
  781. }
  782. if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
  783. (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
  784. (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
  785. (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
  786. (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
  787. (p5 && p6 && p11) {
  788. reason = reason + "满足中标A,3要素组合-" + ss + ","
  789. return true
  790. }
  791. return false
  792. }
  793. //中标_B
  794. func winningRepeat_B(v *Info, info *Info, reason string) bool {
  795. m, n := 0, 0
  796. if v.projectname != "" && v.projectname == info.projectname {
  797. m++
  798. n++
  799. }
  800. if v.buyer != "" && v.buyer == info.buyer {
  801. m++
  802. }
  803. if v.projectcode != "" && v.projectcode == info.projectcode {
  804. m++
  805. }
  806. if v.bidamount != 0 && v.bidamount == info.bidamount {
  807. m++
  808. }
  809. if v.winner != "" && v.winner == info.winner {
  810. m++
  811. }
  812. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  813. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  814. m++
  815. n++
  816. }
  817. if m >= 2 {
  818. if n == 2 && m == 2 {
  819. return false
  820. } else {
  821. reason = reason + "满足中标B.六选二,"
  822. return true
  823. }
  824. }
  825. return false
  826. }
  827. //中标_C
  828. func winningRepeat_C(v *Info, info *Info) bool {
  829. if v.bidamount != 0 && info.bidamount != 0 && v.bidamount != info.bidamount {
  830. return true
  831. }
  832. if v.winner != "" && info.winner != "" && v.winner != info.winner {
  833. return true
  834. }
  835. //原始地址...
  836. return false
  837. }
  838. //合同_A
  839. func contractRepeat_A(v *Info, info *Info, reason string) bool {
  840. if tenderRepeat_A(v, info, reason) {
  841. return true
  842. }
  843. if winningRepeat_A(v, info, reason) {
  844. return true
  845. }
  846. return false
  847. }
  848. //合同_B
  849. func contractRepeat_B(v *Info, info *Info, reason string) bool {
  850. if tenderRepeat_B(v, info, reason) {
  851. return true
  852. }
  853. if winningRepeat_B(v, info, reason) {
  854. return true
  855. }
  856. return false
  857. }
  858. //合同_C
  859. func contractRepeat_C(v *Info, info *Info) bool {
  860. if tenderRepeat_C(v, info) {
  861. return true
  862. }
  863. if winningRepeat_C(v, info) {
  864. return true
  865. }
  866. return false
  867. }
  868. func (d *datamap) update(t int64) {
  869. //每天0点清除历史数据
  870. d.keymap = d.GetLatelyFiveDay(t)
  871. m := map[string]bool{}
  872. for _, v := range d.keymap {
  873. m[v] = true
  874. }
  875. all, all1 := 0, 0
  876. for k, v := range d.data {
  877. all += len(v)
  878. if !m[k[:8]] {
  879. delete(d.data, k)
  880. }
  881. }
  882. for k, _ := range d.keys {
  883. if !m[k] {
  884. delete(d.keys, k)
  885. }
  886. }
  887. for _, v := range d.data {
  888. all1 += len(v)
  889. }
  890. //log.Println("更新前后数据:", all, all1)
  891. }
  892. func (d *datamap) GetLatelyFiveDay(t int64) []string {
  893. array := make([]string, d.days)
  894. now := time.Unix(t, 0)
  895. for i := 0; i < d.days; i++ {
  896. array[i] = now.Format(qutil.Date_yyyyMMdd)
  897. now = now.AddDate(0, 0, -1)
  898. }
  899. return array
  900. }