datamap.go 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. qutil "qfw/util"
  6. "qfw/util/mongodb"
  7. "regexp"
  8. "strings"
  9. "sync"
  10. "time"
  11. )
  12. type Info struct {
  13. id string //id
  14. title string //标题
  15. area string //省份
  16. city string //城市
  17. subtype string //信息类型
  18. buyer string //采购单位
  19. agency string //代理机构
  20. winner string //中标单位
  21. budget float64 //预算金额
  22. bidamount float64 //中标金额
  23. projectname string //项目名称
  24. projectcode string //项目编号
  25. contractnumber string //合同编号
  26. publishtime int64 //发布时间
  27. comeintime int64 //入库时间
  28. bidopentime int64 //开标时间
  29. bidopenaddress string //开标地点
  30. site string //站点
  31. href string //正文的url
  32. repeatid string //重复id
  33. titleSpecialWord bool //标题特殊词
  34. specialWord bool //再次判断的特殊词
  35. mergemap map[string]interface{} //合并记录
  36. is_site bool //是否站点城市
  37. }
  38. var datelimit = float64(432000) //五天
  39. var sitelock sync.Mutex //锁
  40. //一般数据判重
  41. type datamap struct {
  42. lock sync.Mutex //锁
  43. days int //保留几天数据
  44. data map[string][]*Info
  45. keymap []string
  46. areakeys []string
  47. keys map[string]bool
  48. }
  49. //历史更新数据
  50. type historymap struct {
  51. lock sync.Mutex //锁
  52. days int //保留几天数据
  53. data map[string][]*Info
  54. keymap []string
  55. areakeys []string
  56. keys map[string]bool
  57. }
  58. func TimedTaskDatamap(days int,lasttime int64) *datamap {
  59. log.Println("数据池开始重新构建")
  60. datelimit = qutil.Float64All(days * 86400)
  61. dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, []string{},map[string]bool{}}
  62. if lasttime <0 {
  63. log.Println("数据池空数据")
  64. return dm
  65. }
  66. start := int(time.Now().Unix())
  67. sess := mgo.GetMgoConn()
  68. defer mgo.DestoryMongoConn(sess)
  69. query := map[string]interface{}{"publishtime": map[string]interface{}{
  70. "$lt": lasttime,
  71. }}
  72. log.Println("query", query)
  73. it := sess.DB(mgo.DbName).C(extract_back).Find(query).Sort("-publishtime").Iter()
  74. n, continuSum := 0, 0
  75. for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  76. //qutil.IntAll(tmp["dataging"]) == 1
  77. if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1||qutil.IntAll(tmp["dataging"]) == 1 {
  78. } else {
  79. pt := tmp["publishtime"]
  80. pt_time := qutil.Int64All(pt)
  81. if qutil.Float64All(lasttime-pt_time) < datelimit {
  82. continuSum++
  83. info := NewInfo(tmp)
  84. dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd)
  85. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  86. data := dm.data[k]
  87. if data == nil {
  88. data = []*Info{}
  89. }
  90. data = append(data, info)
  91. dm.data[k] = data
  92. dm.keys[dkey] = true
  93. //添加省
  94. isAreaExist :=false
  95. for _,v:= range dm.areakeys {
  96. if v==info.area {
  97. isAreaExist = true
  98. }
  99. }
  100. if !isAreaExist {
  101. areaArr := dm.areakeys
  102. areaArr = append(areaArr,info.area)
  103. dm.areakeys = areaArr
  104. }
  105. } else {
  106. break
  107. }
  108. }
  109. if n%50000 == 0 {
  110. log.Println("current 数据池:", n, continuSum)
  111. }
  112. tmp = make(map[string]interface{})
  113. }
  114. log.Printf("数据池构建完成::%d秒,%d个\n", int(time.Now().Unix())-start, n)
  115. return dm
  116. }
  117. func NewDatamap(days int, lastid string) *datamap {
  118. datelimit = qutil.Float64All(days * 86400)
  119. dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}}
  120. if lastid == "" {
  121. return dm
  122. }
  123. //初始化加载数据
  124. sess := mgo.GetMgoConn()
  125. defer mgo.DestoryMongoConn(sess)
  126. query := map[string]interface{}{"_id": map[string]interface{}{
  127. "$lte": StringTOBsonId(lastid),
  128. }}
  129. log.Println("query", query)
  130. it := sess.DB(mgo.DbName).C(extract).Find(query).Sort("-_id").Iter()
  131. now1 := int64(0)
  132. n, continuSum := 0, 0
  133. for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  134. if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 {
  135. continuSum++
  136. } else {
  137. pt := tmp["comeintime"]
  138. if Is_Sort {
  139. pt = tmp["publishtime"]
  140. }
  141. pt_time := qutil.Int64All(pt)
  142. if pt_time <= 0 {
  143. continue
  144. }
  145. if now1 == 0 {
  146. now1 = pt_time
  147. }
  148. if qutil.Float64All(now1-pt_time) < datelimit {
  149. info := NewInfo(tmp)
  150. dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd)
  151. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  152. data := dm.data[k]
  153. if data == nil {
  154. data = []*Info{}
  155. }
  156. data = append(data, info)
  157. dm.data[k] = data
  158. dm.keys[dkey] = true
  159. //添加省
  160. isAreaExist :=false
  161. for _,v:= range dm.areakeys {
  162. if v==info.area {
  163. isAreaExist = true
  164. }
  165. }
  166. if !isAreaExist {
  167. areaArr := dm.areakeys
  168. areaArr = append(areaArr,info.area)
  169. dm.areakeys = areaArr
  170. }
  171. } else {
  172. break
  173. }
  174. }
  175. if n%5000 == 0 {
  176. log.Println("current n:", n, continuSum)
  177. }
  178. tmp = make(map[string]interface{})
  179. }
  180. log.Println("load data:", n)
  181. return dm
  182. }
  183. //构建新历史数据池
  184. func NewHistorymap(startid string, lastid string, startTime int64, lastTime int64) *historymap {
  185. datelimit = qutil.Float64All(5 * 86400)
  186. hm := &historymap{sync.Mutex{}, 5, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}}
  187. if lastid == "" || startid == "" {
  188. return hm
  189. }
  190. //取startid之前5天
  191. sess_start := mgo.GetMgoConn()
  192. defer mgo.DestoryMongoConn(sess_start) //lte gte
  193. it_start := sess_start.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$lte":"`+startid+`"}}`,
  194. true)).Sort("-_id").Iter()
  195. m, n := 0, 0
  196. for tmp_start := make(map[string]interface{}); it_start.Next(&tmp_start); {
  197. if qutil.IntAll(tmp_start["repeat"]) == 1||qutil.IntAll(tmp_start["repeat"]) == -1 {
  198. continue
  199. }
  200. pt_s := tmp_start["comeintime"]
  201. if Is_Sort {
  202. pt_s = tmp_start["publishtime"]
  203. }
  204. pt_time := qutil.Int64All(pt_s)
  205. if pt_time <= 0 {
  206. continue
  207. }
  208. if qutil.Float64All(startTime-pt_time) <= datelimit {
  209. n++
  210. info := NewInfo(tmp_start)
  211. dkey := qutil.FormatDateWithObj(&pt_s, qutil.Date_yyyyMMdd)
  212. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  213. data := hm.data[k]
  214. if data == nil {
  215. data = []*Info{}
  216. }
  217. data = append(data, info)
  218. hm.data[k] = data
  219. hm.keys[dkey] = true
  220. //添加省
  221. isAreaExist :=false
  222. for _,v:= range hm.areakeys {
  223. if v==info.area {
  224. isAreaExist = true
  225. }
  226. }
  227. if !isAreaExist {
  228. areaArr := hm.areakeys
  229. areaArr = append(areaArr,info.area)
  230. hm.areakeys = areaArr
  231. }
  232. } else {
  233. break
  234. }
  235. tmp_start = make(map[string]interface{})
  236. }
  237. log.Println("load history 前:", n)
  238. //取lastid之后5天
  239. sess_last := mgo.GetMgoConn()
  240. defer mgo.DestoryMongoConn(sess_last) //lte gte
  241. it_last := sess_last.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$gte":"`+lastid+`"}}`,
  242. true)).Sort("_id").Iter()
  243. for tmp_last := make(map[string]interface{}); it_last.Next(&tmp_last); {
  244. if qutil.IntAll(tmp_last["repeat"]) == 1||qutil.IntAll(tmp_last["repeat"]) == -1 {
  245. continue
  246. }
  247. pt_l := tmp_last["comeintime"]
  248. if Is_Sort {
  249. pt_l = tmp_last["publishtime"]
  250. }
  251. pt_time := qutil.Int64All(pt_l)
  252. if pt_time <= 0 {
  253. continue
  254. }
  255. if qutil.Float64All(pt_time-lastTime) <= datelimit {
  256. m++
  257. info := NewInfo(tmp_last)
  258. dkey := qutil.FormatDateWithObj(&pt_l, qutil.Date_yyyyMMdd)
  259. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  260. data := hm.data[k]
  261. if data == nil {
  262. data = []*Info{}
  263. }
  264. data = append(data, info)
  265. hm.data[k] = data
  266. hm.keys[dkey] = true
  267. //添加省
  268. isAreaExist :=false
  269. for _,v:= range hm.areakeys {
  270. if v==info.area {
  271. isAreaExist = true
  272. }
  273. }
  274. if !isAreaExist {
  275. areaArr := hm.areakeys
  276. areaArr = append(areaArr,info.area)
  277. hm.areakeys = areaArr
  278. }
  279. } else {
  280. break
  281. }
  282. tmp_last = make(map[string]interface{})
  283. }
  284. log.Println("load history 后:", m)
  285. return hm
  286. }
  287. func NewInfo(tmp map[string]interface{}) *Info {
  288. subtype := qutil.ObjToString(tmp["subtype"])
  289. area := qutil.ObjToString(tmp["area"])
  290. if area == "A" {
  291. area = "全国"
  292. }
  293. info := &Info{}
  294. info.id = BsonTOStringId(tmp["_id"])
  295. info.title = qutil.ObjToString(tmp["title"])
  296. info.area = area
  297. info.subtype = subtype
  298. info.buyer = qutil.ObjToString(tmp["buyer"])
  299. info.projectname = qutil.ObjToString(tmp["projectname"])
  300. info.contractnumber = qutil.ObjToString(tmp["contractnumber"])
  301. info.projectcode = qutil.ObjToString(tmp["projectcode"])
  302. info.city = qutil.ObjToString(tmp["city"])
  303. info.agency = qutil.ObjToString(tmp["agency"])
  304. info.winner = qutil.ObjToString(tmp["winner"])
  305. info.budget = qutil.Float64All(tmp["budget"])
  306. info.bidamount = qutil.Float64All(tmp["bidamount"])
  307. info.publishtime = qutil.Int64All(tmp["publishtime"])
  308. info.comeintime = qutil.Int64All(tmp["comeintime"])
  309. info.bidopentime = qutil.Int64All(tmp["bidopentime"])
  310. info.bidopenaddress = qutil.ObjToString(tmp["bidopenaddress"])
  311. info.site = qutil.ObjToString(tmp["site"])
  312. info.href = qutil.ObjToString(tmp["href"])
  313. info.repeatid = qutil.ObjToString(tmp["repeatid"])
  314. info.specialWord = FilterRegTitle.MatchString(info.title)
  315. info.titleSpecialWord = FilterRegTitle_0.MatchString(info.title) ||FilterRegTitle_1.MatchString(info.title) || FilterRegTitle_2.MatchString(info.title)
  316. info.mergemap = *qutil.ObjToMap(tmp["merge_map"])
  317. if info.mergemap == nil {
  318. info.mergemap = make(map[string]interface{}, 0)
  319. }
  320. info.is_site = false
  321. return info
  322. }
  323. //判重方法
  324. func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) {
  325. reason := ""
  326. keys := []string{}
  327. d.lock.Lock()
  328. for k, _ := range d.keys { //不同时间段
  329. if info.area=="全国" {
  330. //匹配所有省
  331. for _,v := range d.areakeys{
  332. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, v))
  333. }
  334. }else {
  335. //匹配指定省
  336. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
  337. }
  338. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
  339. }
  340. d.lock.Unlock()
  341. L:
  342. for _, k := range keys {
  343. d.lock.Lock()
  344. data := d.data[k]
  345. d.lock.Unlock()
  346. if len(data) > 0 { //对比v 找到同类型,同省或全国的数据作对比
  347. //log.Println(info.area,info.subtype,k)
  348. for _, v := range data {
  349. reason = ""
  350. if v.id == info.id { //正常重复
  351. //log.Println("相同id",info.id)
  352. return false, v, ""
  353. }
  354. //if v.id == "5c761a4fa5cb26b9b73d9512" &&info.id=="5c767bd1a5cb26b9b7a61597" {
  355. // log.Println("测试数据")
  356. //}
  357. if info.subtype == v.subtype {
  358. if info.site != "" {
  359. sitelock.Lock()
  360. dict := SiteMap[info.site]
  361. sitelock.Unlock()
  362. if dict != nil {
  363. if info.area == "全国" && dict["area"] != "" {
  364. info.is_site = true
  365. info.area = qutil.ObjToString(dict["area"])
  366. info.city = qutil.ObjToString(dict["city"])
  367. } else {
  368. if info.city == "" && dict["city"] != "" {
  369. info.is_site = true
  370. info.area = qutil.ObjToString(dict["area"])
  371. info.city = qutil.ObjToString(dict["city"])
  372. }
  373. }
  374. }
  375. }
  376. //前置条件1 - 站点相关
  377. if info.site != "" && info.site == v.site {
  378. if info.href != "" && info.href == v.href {
  379. reason = "href相同"
  380. b = true
  381. source = v
  382. reasons = reason
  383. break L
  384. }
  385. if info.href != "" && info.href != v.href {
  386. reason = "href不同-"
  387. }
  388. }
  389. //前置条件2 - 标题相关,有且一个关键词
  390. if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
  391. info.title != v.title && v.title != "" && info.title != "" {
  392. continue
  393. }
  394. //前置条件3 - 标题相关,均含有关键词
  395. if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
  396. len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
  397. if !(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  398. continue //无包含关系
  399. }
  400. if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) {
  401. reason = reason + "标题关键词且包含关系"
  402. //继续二级金额判断
  403. if !againRepeat(v, info) {
  404. b = true
  405. source = v
  406. reasons = reason
  407. break
  408. }
  409. }
  410. }
  411. //新增快速数据过少判重
  412. if LowHeavy {
  413. repeat := false
  414. if repeat, reason = fastLowQualityHeavy(v, info, reason); repeat {
  415. b = true
  416. source = v
  417. reasons = reason
  418. break
  419. }
  420. }
  421. //代理机构相同-非空相等
  422. if v.agency != "" && info.agency != "" && v.agency == info.agency {
  423. reason = reason + "同机构-"
  424. repeat := false
  425. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  426. b = true
  427. source = v
  428. reasons = reason
  429. break
  430. }
  431. } else {
  432. reason = reason + "非同机构-"
  433. if info.city != "" && info.city == v.city {
  434. reason = reason + "同城-"
  435. repeat := false
  436. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  437. b = true
  438. source = v
  439. reasons = reason
  440. break
  441. }
  442. } else {
  443. reason = reason + "不同城-"
  444. repeat := false
  445. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  446. b = true
  447. source = v
  448. reasons = reason
  449. break
  450. }
  451. }
  452. }
  453. }
  454. }
  455. }
  456. }
  457. //往预存数据 d 添加
  458. if !b {
  459. ct := info.comeintime
  460. if Is_Sort ||TimingTask{
  461. ct = info.publishtime
  462. }
  463. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  464. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  465. d.lock.Lock()
  466. data := d.data[k]
  467. if data == nil {
  468. data = []*Info{info}
  469. d.data[k] = data
  470. if !d.keys[dkey] {
  471. d.keys[dkey] = true
  472. d.update(ct)
  473. }
  474. } else {
  475. data = append(data, info)
  476. d.data[k] = data
  477. }
  478. //添加省
  479. isAreaExist :=false
  480. for _,v:= range d.areakeys {
  481. if v==info.area {
  482. isAreaExist = true
  483. }
  484. }
  485. if !isAreaExist {
  486. areaArr := d.areakeys
  487. areaArr = append(areaArr,info.area)
  488. d.areakeys = areaArr
  489. }
  490. d.lock.Unlock()
  491. }
  492. return
  493. }
  494. func (h *historymap) checkHistory(info *Info) (b bool, source *Info, reasons string) {
  495. reason := ""
  496. keys := []string{}
  497. h.lock.Lock()
  498. for k, _ := range h.keys { //不同时间段
  499. if info.area=="全国" {
  500. //匹配所有省
  501. for _,v := range h.areakeys{
  502. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, v))
  503. }
  504. }else {
  505. //匹配指定省
  506. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
  507. }
  508. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
  509. }
  510. h.lock.Unlock()
  511. L:
  512. for _, k := range keys {
  513. h.lock.Lock()
  514. data := h.data[k]
  515. h.lock.Unlock()
  516. if len(data) > 0 { //对比v 找到同类型,同省或全国的数据作对比
  517. for _, v := range data {
  518. reason = ""
  519. if v.id == info.id { //正常重复
  520. return false, v, ""
  521. }
  522. if info.subtype == v.subtype {
  523. if info.site != "" {
  524. sitelock.Lock()
  525. dict := SiteMap[info.site]
  526. sitelock.Unlock()
  527. if dict != nil {
  528. if info.area == "全国" && dict["area"] != "" {
  529. info.area = qutil.ObjToString(dict["area"])
  530. info.city = qutil.ObjToString(dict["city"])
  531. } else {
  532. if info.city == "" && dict["city"] != "" {
  533. info.area = qutil.ObjToString(dict["area"])
  534. info.city = qutil.ObjToString(dict["city"])
  535. }
  536. }
  537. }
  538. }
  539. //前置条件1 - 站点相关
  540. if info.site != "" && info.site == v.site {
  541. if info.href != "" && info.href == v.href {
  542. reason = "href相同"
  543. b = true
  544. source = v
  545. reasons = reason
  546. break L
  547. }
  548. if info.href != "" && info.href != v.href {
  549. reason = "href不同-"
  550. }
  551. }
  552. //前置条件2 - 标题相关,有且一个关键词
  553. if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
  554. info.title != v.title && v.title != "" && info.title != "" {
  555. continue
  556. }
  557. //前置条件3 - 标题相关,均含有关键词
  558. if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
  559. len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
  560. if !(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  561. continue //无包含关系
  562. }
  563. if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) {
  564. reason = reason + "标题关键词且包含关系"
  565. //继续二级金额判断
  566. if !againRepeat(v, info) {
  567. b = true
  568. source = v
  569. reasons = reason
  570. break
  571. }
  572. }
  573. }
  574. //新增快速数据过少判重
  575. if LowHeavy {
  576. repeat := false
  577. if repeat, reason = fastLowQualityHeavy(v, info, reason); repeat {
  578. b = true
  579. source = v
  580. reasons = reason
  581. break
  582. }
  583. }
  584. //代理机构相同-非空相等
  585. if v.agency != "" && info.agency != "" && v.agency == info.agency {
  586. reason = reason + "同机构-"
  587. repeat := false
  588. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  589. b = true
  590. source = v
  591. reasons = reason
  592. break
  593. }
  594. } else {
  595. reason = reason + "非同机构-"
  596. if info.city != "" && info.city == v.city {
  597. reason = reason + "同城-"
  598. repeat := false
  599. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  600. b = true
  601. source = v
  602. reasons = reason
  603. break
  604. }
  605. } else {
  606. reason = reason + "不同城-"
  607. repeat := false
  608. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  609. b = true
  610. source = v
  611. reasons = reason
  612. break
  613. }
  614. }
  615. }
  616. }
  617. }
  618. }
  619. }
  620. //
  621. if b {
  622. if info.repeatid == source.id {
  623. b = false //重复-无变化-不处理
  624. }
  625. } else {
  626. if source != nil {
  627. if source.repeatid != "" { //未判重-有变化--记录
  628. b = true
  629. reason = "未判重记录"
  630. reasons = reason
  631. }
  632. }
  633. }
  634. //往预存数据 d 添加
  635. if !b {
  636. ct := info.comeintime
  637. if Is_Sort {
  638. ct = info.publishtime
  639. }
  640. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  641. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  642. data := h.data[k]
  643. if data == nil {
  644. data = []*Info{info}
  645. h.data[k] = data
  646. if !h.keys[dkey] {
  647. h.keys[dkey] = true
  648. //h.update(ct)
  649. }
  650. } else {
  651. data = append(data, info)
  652. h.data[k] = data
  653. }
  654. //添加省
  655. isAreaExist :=false
  656. for _,v:= range h.areakeys {
  657. if v==info.area {
  658. isAreaExist = true
  659. }
  660. }
  661. if !isAreaExist {
  662. areaArr := h.areakeys
  663. areaArr = append(areaArr,info.area)
  664. h.areakeys = areaArr
  665. }
  666. }
  667. return
  668. }
  669. //替换原始数据池
  670. func (d *datamap) replaceSourceData(replaceData *Info, replaceId string) {
  671. ct := replaceData.comeintime
  672. if Is_Sort||TimingTask {
  673. ct = replaceData.publishtime
  674. }
  675. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  676. k := fmt.Sprintf("%s_%s_%s", dkey, replaceData.subtype, replaceData.area)
  677. d.lock.Lock()
  678. data := d.data[k]
  679. if data == nil {
  680. data = []*Info{replaceData}
  681. d.data[k] = data
  682. if !d.keys[dkey] {
  683. d.keys[dkey] = true
  684. }
  685. } else {
  686. //遍历替换
  687. for k, v := range data {
  688. if v.id == replaceId {
  689. data[k] = replaceData
  690. break
  691. }
  692. }
  693. d.data[k] = data
  694. }
  695. d.lock.Unlock()
  696. }
  697. func (h *historymap) replaceSourceData(replaceData *Info, replaceId string) {
  698. ct := replaceData.comeintime
  699. if Is_Sort {
  700. ct = replaceData.publishtime
  701. }
  702. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  703. k := fmt.Sprintf("%s_%s_%s", dkey, replaceData.subtype, replaceData.area)
  704. h.lock.Lock()
  705. data := h.data[k]
  706. if data == nil {
  707. data = []*Info{replaceData}
  708. h.data[k] = data
  709. if !h.keys[dkey] {
  710. h.keys[dkey] = true
  711. }
  712. } else {
  713. //遍历替换
  714. for k, v := range data {
  715. if v.id == replaceId {
  716. data[k] = replaceData
  717. break
  718. }
  719. }
  720. h.data[k] = data
  721. }
  722. h.lock.Unlock()
  723. }
  724. func (d *datamap) update(t int64) {
  725. //每天0点清除历史数据
  726. d.keymap = d.GetLatelyFiveDay(t)
  727. m := map[string]bool{}
  728. for _, v := range d.keymap {
  729. m[v] = true
  730. }
  731. all, all1 := 0, 0
  732. for k, v := range d.data {
  733. all += len(v)
  734. if !m[k[:8]] {
  735. delete(d.data, k)
  736. }
  737. }
  738. for k, _ := range d.keys {
  739. if !m[k] {
  740. delete(d.keys, k)
  741. }
  742. }
  743. for _, v := range d.data {
  744. all1 += len(v)
  745. }
  746. //log.Println("更新前后数据:", all, all1)
  747. }
  748. func (d *datamap) GetLatelyFiveDay(t int64) []string {
  749. array := make([]string, d.days)
  750. now := time.Unix(t, 0)
  751. for i := 0; i < d.days; i++ {
  752. array[i] = now.Format(qutil.Date_yyyyMMdd)
  753. now = now.AddDate(0, 0, -1)
  754. }
  755. return array
  756. }
  757. /*
  758. **************************
  759. ******** 以下为判重 ********
  760. **************************
  761. */
  762. //快速低质量数据判重
  763. func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
  764. //首先判定是否为低质量数据 info目标数据
  765. if info.agency==v.agency&&info.title!=""&&
  766. info.title==v.title &&
  767. info.projectname==""&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
  768. isValue:=0//五要素判断
  769. if info.budget != 0 {//预算
  770. isValue++
  771. }
  772. if info.bidopentime != 0{//开标时间
  773. isValue++
  774. }
  775. if info.bidopenaddress!=""{//开标地点
  776. isValue++
  777. }
  778. if info.winner != ""{//中标单位
  779. isValue++
  780. }
  781. if info.bidamount != 0 {//中标金额
  782. isValue++
  783. }
  784. if isValue==0 {
  785. //if info.site!=v.site {
  786. // log.Println("符合低质量条件条件0",info.id,"--",v.id)
  787. //}
  788. //log.Println("符合低质量条件条件0",info.id,"--",v.id)
  789. reason = reason + "---要素均为空,标题包含关系"
  790. return true, reason
  791. }else if isValue==1 {
  792. isMeet := false
  793. if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
  794. //log.Println("符合低质量条件条件1",info.id,"--",v.id)
  795. reason = reason + "---有且一个要素组合"
  796. return true, reason
  797. }
  798. }else {
  799. }
  800. }
  801. return false,reason
  802. }
  803. //类别细节原因记录
  804. func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
  805. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  806. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  807. info.subtype == "变更" || info.subtype == "其他" {
  808. //招标结果
  809. if info.budget != 0 && info.budget == v.budget{//预算
  810. reason = reason + "---招标类:预算"
  811. return true,reason
  812. }
  813. if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间
  814. reason = reason + "---招标类:开标时间"
  815. return true,reason
  816. }
  817. if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
  818. reason = reason + "---招标类:开标地点"
  819. return true,reason
  820. }
  821. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  822. //中标结果
  823. if v.winner != "" && info.winner == v.winner{//中标单位
  824. reason = reason + "---中标类:中标单位"
  825. return true,reason
  826. }
  827. if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
  828. reason = reason + "---中标类:中标金额"
  829. return true,reason
  830. }
  831. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  832. //合同
  833. if info.budget != 0 && info.budget == v.budget{//预算
  834. reason = reason + "---合同类:预算"
  835. return true,reason
  836. }
  837. if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间
  838. reason = reason + "---合同类:开标时间"
  839. return true,reason
  840. }
  841. if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
  842. reason = reason + "---合同类:开标地点"
  843. return true,reason
  844. }
  845. if v.winner != "" && info.winner == v.winner{//中标单位
  846. reason = reason + "---合同类:中标单位"
  847. return true,reason
  848. }
  849. if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
  850. reason = reason + "---合同类:中标金额"
  851. return true,reason
  852. }
  853. } else {
  854. //招标结果
  855. if info.budget != 0 && info.budget == v.budget{//预算
  856. reason = reason + "---类别空-招标类:预算"
  857. return true,reason
  858. }
  859. if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间
  860. reason = reason + "---类别空-招标类:开标时间"
  861. return true,reason
  862. }
  863. if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
  864. reason = reason + "---类别空-招标类:开标地点"
  865. return true,reason
  866. }
  867. }
  868. return false,reason
  869. }
  870. //判重方法1
  871. func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
  872. isMeet := false
  873. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  874. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  875. info.subtype == "变更" || info.subtype == "其他" {
  876. //招标结果
  877. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  878. if tenderRepeat_C(v, info) {
  879. return false, reason
  880. } else {
  881. reason = reason + "---招标类"
  882. return true, reason
  883. }
  884. } else {
  885. return false, reason
  886. }
  887. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  888. //中标结果
  889. if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
  890. if winningRepeat_C(v, info) {
  891. return false, reason
  892. } else {
  893. reason = reason + "---中标类"
  894. return true, reason
  895. }
  896. } else {
  897. return false, reason
  898. }
  899. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  900. //合同
  901. if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
  902. if contractRepeat_C(v, info) {
  903. return false, reason
  904. } else {
  905. reason = reason + "---合同类"
  906. return true, reason
  907. }
  908. } else {
  909. return false, reason
  910. }
  911. } else {
  912. //招标结果
  913. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  914. if tenderRepeat_C(v, info) {
  915. return false, reason
  916. } else {
  917. reason = reason + "---类别空-招标类"
  918. return true, reason
  919. }
  920. } else {
  921. return false, reason
  922. }
  923. }
  924. return false, reason
  925. }
  926. //判重方法2
  927. func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
  928. isMeet := false
  929. if v.agency == info.agency && v.agency != "" && info.agency != "" {
  930. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  931. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  932. info.subtype == "变更" || info.subtype == "其他" {
  933. //招标结果
  934. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  935. if tenderRepeat_C(v, info) { //有不同
  936. return false, reason
  937. } else {
  938. reason = reason + "---招标类"
  939. return true, reason
  940. }
  941. } else {
  942. return false, reason
  943. }
  944. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  945. //中标结果
  946. if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
  947. if winningRepeat_C(v, info) { //有不同
  948. return false, reason
  949. } else {
  950. reason = reason + "---中标类"
  951. return true, reason
  952. }
  953. } else {
  954. return false, reason
  955. }
  956. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  957. //合同
  958. if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
  959. if contractRepeat_C(v, info) { //有不同
  960. return false, reason
  961. } else {
  962. reason = reason + "---合同类"
  963. return true, reason
  964. }
  965. } else {
  966. return false, reason
  967. }
  968. } else {
  969. //招标结果
  970. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  971. if tenderRepeat_C(v, info) { //有不同
  972. return false, reason
  973. } else {
  974. reason = reason + "---类别空-招标类"
  975. return true, reason
  976. }
  977. } else {
  978. return false, reason
  979. }
  980. }
  981. }
  982. //不同
  983. if v.agency != info.agency && v.agency != "" && info.agency != "" {
  984. return false, reason
  985. }
  986. //机构最少一个为空
  987. if v.agency == "" || info.agency == "" {
  988. var repeat = false
  989. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  990. reason = reason + "---机构最少一个空"
  991. return true, reason
  992. } else {
  993. return false, reason
  994. }
  995. }
  996. return false, reason
  997. }
  998. //招标_A
  999. func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  1000. var ss string
  1001. p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
  1002. if v.projectname != "" && v.projectname == info.projectname {
  1003. ss = ss + "p1(名称)-"
  1004. p1 = true
  1005. }
  1006. if v.buyer != "" && v.buyer == info.buyer {
  1007. ss = ss + "p2(单位)-"
  1008. p2 = true
  1009. }
  1010. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  1011. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  1012. ss = ss + "p3(编号组)-"
  1013. p3 = true
  1014. }
  1015. if v.budget != 0 && v.budget == info.budget {
  1016. ss = ss + "p4(预算)-"
  1017. p4 = true
  1018. }
  1019. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  1020. ss = ss + "p9(开标时间)-"
  1021. p9 = true
  1022. }
  1023. if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  1024. ss = ss + "p10(开标地点)-"
  1025. p10 = true
  1026. }
  1027. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  1028. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  1029. ss = ss + "p11(标题)-"
  1030. p11 = true
  1031. }
  1032. if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
  1033. (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
  1034. (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
  1035. (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
  1036. (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
  1037. (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
  1038. (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
  1039. reason = reason + "满足招标A,3要素组合-" + ss + ","
  1040. return true, reason
  1041. }
  1042. return false, reason
  1043. }
  1044. //招标_B
  1045. func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  1046. m, n := 0, 0
  1047. if v.projectname != "" && v.projectname == info.projectname {
  1048. m++
  1049. n++
  1050. }
  1051. if v.buyer != "" && v.buyer == info.buyer {
  1052. m++
  1053. }
  1054. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  1055. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  1056. m++
  1057. }
  1058. if v.budget != 0 && v.budget == info.budget {
  1059. m++
  1060. }
  1061. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  1062. m++
  1063. }
  1064. //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  1065. // m++
  1066. //}
  1067. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  1068. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  1069. m++
  1070. n++
  1071. }
  1072. if m >= 2 {
  1073. if n == 2 && m == 2 {
  1074. return false, reason
  1075. } else {
  1076. reason = reason + "满足招标B,六选二,"
  1077. return true, reason
  1078. }
  1079. }
  1080. return false, reason
  1081. }
  1082. //招标_C
  1083. func tenderRepeat_C(v *Info, info *Info) bool {
  1084. if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
  1085. return true
  1086. }
  1087. //原始地址...
  1088. if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
  1089. return true
  1090. }
  1091. if v.bidopentime != 0 && info.bidopentime != 0 && v.bidopentime != info.bidopentime {
  1092. return true
  1093. }
  1094. if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress {
  1095. return true
  1096. }
  1097. return false
  1098. }
  1099. //中标_A
  1100. func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  1101. var ss string
  1102. p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
  1103. if v.projectname != "" && v.projectname == info.projectname {
  1104. ss = ss + "p1(项目名称)-"
  1105. p1 = true
  1106. }
  1107. if v.buyer != "" && v.buyer == info.buyer {
  1108. ss = ss + "p2(单位)-"
  1109. p2 = true
  1110. }
  1111. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  1112. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  1113. ss = ss + "p3(编号组)-"
  1114. p3 = true
  1115. }
  1116. //if v.bidamount != 0 && v.bidamount == info.bidamount {
  1117. // ss = ss + "p5(中标金)-"
  1118. // p5 = true
  1119. //}
  1120. //if v.winner != "" && v.winner == info.winner {
  1121. // ss = ss + "p6(中标人)-"
  1122. // p6 = true
  1123. //}
  1124. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  1125. ss = ss + "p5(中标金)-"
  1126. p5 = true
  1127. }
  1128. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  1129. ss = ss + "p6(中标人)-"
  1130. p6 = true
  1131. }
  1132. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  1133. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  1134. ss = ss + "p11(标题)-"
  1135. p11 = true
  1136. }
  1137. if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
  1138. (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
  1139. (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
  1140. (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
  1141. (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
  1142. (p5 && p6 && p11) {
  1143. reason = reason + "满足中标A,3要素组合-" + ss + ","
  1144. return true, reason
  1145. }
  1146. return false, reason
  1147. }
  1148. //中标_B
  1149. func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  1150. m, n := 0, 0
  1151. if v.projectname != "" && v.projectname == info.projectname {
  1152. m++
  1153. n++
  1154. }
  1155. if v.buyer != "" && v.buyer == info.buyer {
  1156. m++
  1157. }
  1158. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  1159. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  1160. m++
  1161. }
  1162. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  1163. m++
  1164. }
  1165. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  1166. m++
  1167. }
  1168. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  1169. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  1170. m++
  1171. n++
  1172. }
  1173. if m >= 2 {
  1174. if n == 2 && m == 2 {
  1175. return false, reason
  1176. } else {
  1177. reason = reason + "满足中标B.六选二,"
  1178. return true, reason
  1179. }
  1180. }
  1181. return false, reason
  1182. }
  1183. //中标_C
  1184. func winningRepeat_C(v *Info, info *Info) bool {
  1185. //if v.bidamount != 0 && info.bidamount != 0 && v.bidamount != info.bidamount {
  1186. // return true
  1187. //}
  1188. if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
  1189. return true
  1190. }
  1191. if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
  1192. return true
  1193. }
  1194. //原始地址...
  1195. if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
  1196. return true
  1197. }
  1198. return false
  1199. }
  1200. //合同_A
  1201. func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  1202. isMeet_1 := false
  1203. if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
  1204. return true, reason
  1205. }
  1206. isMeet_2 := false
  1207. if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
  1208. return true, reason
  1209. }
  1210. return false, reason
  1211. }
  1212. //合同_B
  1213. func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  1214. isMeet_1 := false
  1215. if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
  1216. return true, reason
  1217. }
  1218. isMeet_2 := false
  1219. if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
  1220. return true, reason
  1221. }
  1222. return false, reason
  1223. }
  1224. //合同_C
  1225. func contractRepeat_C(v *Info, info *Info) bool {
  1226. if tenderRepeat_C(v, info) {
  1227. return true
  1228. }
  1229. if winningRepeat_C(v, info) {
  1230. return true
  1231. }
  1232. return false
  1233. }
  1234. //再次金额判断
  1235. func againRepeat(v *Info, info *Info) bool {
  1236. //相同采购单位下
  1237. if info.buyer != "" && v.buyer == info.buyer {
  1238. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  1239. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  1240. info.subtype == "其他" || info.subtype == "变更" {
  1241. //预算金额满足条件
  1242. if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
  1243. return true
  1244. }
  1245. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" ||
  1246. info.subtype == "流标" || info.subtype == "合同" || info.subtype == "验收" ||
  1247. info.subtype == "违规" {
  1248. //中标金额单位满足条件
  1249. if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
  1250. (deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
  1251. return true
  1252. }
  1253. } else {
  1254. }
  1255. }
  1256. return false
  1257. }
  1258. //删除中标单位字符串中多余的空格(含tab)
  1259. func deleteExtraSpace(s string) string {
  1260. //删除字符串中的多余空格,有多个空格时,仅保留一个空格
  1261. s1 := strings.Replace(s, " ", " ", -1) //替换tab为空格
  1262. regstr := "\\s{2,}" //两个及两个以上空格的正则表达式
  1263. reg, _ := regexp.Compile(regstr) //编译正则表达式
  1264. s2 := make([]byte, len(s1)) //定义字符数组切片
  1265. copy(s2, s1) //将字符串复制到切片
  1266. spc_index := reg.FindStringIndex(string(s2)) //在字符串中搜索
  1267. for len(spc_index) > 0 { //找到适配项
  1268. s2 = append(s2[:spc_index[0]+1], s2[spc_index[1]:]...) //删除多余空格
  1269. spc_index = reg.FindStringIndex(string(s2)) //继续在字符串中搜索
  1270. }
  1271. return string(s2)
  1272. }
  1273. //中标金额倍率:10000
  1274. func isBidWinningAmount(f1 float64 ,f2 float64) bool {
  1275. if f1==f2||f1*10000==f2||f2*10000==f1 {
  1276. return false
  1277. }
  1278. return true
  1279. }