datamap.go 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. qutil "qfw/util"
  6. "regexp"
  7. "strings"
  8. "sync"
  9. "time"
  10. )
  11. type Info struct {
  12. id string //id
  13. title string //标题
  14. area string //省份
  15. city string //城市
  16. subtype string //信息类型
  17. buyer string //采购单位
  18. agency string //代理机构
  19. winner string //中标单位
  20. budget float64 //预算金额
  21. bidamount float64 //中标金额
  22. projectname string //项目名称
  23. projectcode string //项目编号
  24. contractnumber string //合同编号
  25. publishtime int64 //发布时间
  26. comeintime int64 //入库时间
  27. bidopentime int64 //开标时间
  28. bidopenaddress string //开标地点
  29. site string //站点
  30. href string //正文的url
  31. repeatid string //重复id
  32. titleSpecialWord bool //标题特殊词
  33. specialWord bool //再次判断的特殊词
  34. mergemap map[string]interface{} //合并记录
  35. is_site bool //是否站点城市
  36. }
  37. var datelimit = float64(432000) //五天
  38. var sitelock sync.Mutex //锁
  39. //一般数据判重
  40. type datamap struct {
  41. lock sync.Mutex //锁
  42. days int //保留几天数据
  43. data map[string][]*Info
  44. keymap []string
  45. areakeys []string
  46. keys map[string]bool
  47. }
  48. func TimedTaskDatamap(days int,lasttime int64) *datamap {
  49. log.Println("数据池开始重新构建")
  50. datelimit = qutil.Float64All(days * 86400)
  51. dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, []string{},map[string]bool{}}
  52. if lasttime <0 {
  53. log.Println("数据池空数据")
  54. return dm
  55. }
  56. start := int(time.Now().Unix())
  57. sess := mgo.GetMgoConn()
  58. defer mgo.DestoryMongoConn(sess)
  59. query := map[string]interface{}{"publishtime": map[string]interface{}{
  60. "$lt": lasttime,
  61. }}
  62. log.Println("query", query)
  63. it := sess.DB(mgo.DbName).C(extract_back).Find(query).Sort("-publishtime").Iter()
  64. n, continuSum := 0, 0
  65. for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  66. //qutil.IntAll(tmp["dataging"]) == 1
  67. if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1||qutil.IntAll(tmp["dataging"]) == 1 {
  68. } else {
  69. pt := tmp["publishtime"]
  70. pt_time := qutil.Int64All(pt)
  71. if qutil.Float64All(lasttime-pt_time) < datelimit {
  72. continuSum++
  73. info := NewInfo(tmp)
  74. dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd)
  75. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  76. data := dm.data[k]
  77. if data == nil {
  78. data = []*Info{}
  79. }
  80. data = append(data, info)
  81. dm.data[k] = data
  82. dm.keys[dkey] = true
  83. //添加省
  84. isAreaExist :=false
  85. for _,v:= range dm.areakeys {
  86. if v==info.area {
  87. isAreaExist = true
  88. }
  89. }
  90. if !isAreaExist {
  91. areaArr := dm.areakeys
  92. areaArr = append(areaArr,info.area)
  93. dm.areakeys = areaArr
  94. }
  95. } else {
  96. break
  97. }
  98. }
  99. if n%50000 == 0 {
  100. log.Println("current 数据池:", n, continuSum)
  101. }
  102. tmp = make(map[string]interface{})
  103. }
  104. log.Printf("数据池构建完成::%d秒,%d个\n", int(time.Now().Unix())-start, n)
  105. return dm
  106. }
  107. func NewDatamap(days int, lastid string) *datamap {
  108. datelimit = qutil.Float64All(days * 86400)
  109. dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}}
  110. if lastid == "" {
  111. return dm
  112. }
  113. //初始化加载数据
  114. sess := mgo.GetMgoConn()
  115. defer mgo.DestoryMongoConn(sess)
  116. query := map[string]interface{}{"_id": map[string]interface{}{
  117. "$lte": StringTOBsonId(lastid),
  118. }}
  119. log.Println("query", query)
  120. it := sess.DB(mgo.DbName).C(extract).Find(query).Sort("-_id").Iter()
  121. now1 := int64(0)
  122. n, continuSum := 0, 0
  123. for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  124. if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 {
  125. continuSum++
  126. } else {
  127. pt := tmp["comeintime"]
  128. if Is_Sort {
  129. pt = tmp["publishtime"]
  130. }
  131. pt_time := qutil.Int64All(pt)
  132. if pt_time <= 0 {
  133. continue
  134. }
  135. if now1 == 0 {
  136. now1 = pt_time
  137. }
  138. if qutil.Float64All(now1-pt_time) < datelimit {
  139. info := NewInfo(tmp)
  140. dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd)
  141. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  142. data := dm.data[k]
  143. if data == nil {
  144. data = []*Info{}
  145. }
  146. data = append(data, info)
  147. dm.data[k] = data
  148. dm.keys[dkey] = true
  149. //添加省
  150. isAreaExist :=false
  151. for _,v:= range dm.areakeys {
  152. if v==info.area {
  153. isAreaExist = true
  154. }
  155. }
  156. if !isAreaExist {
  157. areaArr := dm.areakeys
  158. areaArr = append(areaArr,info.area)
  159. dm.areakeys = areaArr
  160. }
  161. } else {
  162. break
  163. }
  164. }
  165. if n%5000 == 0 {
  166. log.Println("current n:", n, continuSum)
  167. }
  168. tmp = make(map[string]interface{})
  169. }
  170. log.Println("load data:", n)
  171. return dm
  172. }
  173. func NewInfo(tmp map[string]interface{}) *Info {
  174. subtype := qutil.ObjToString(tmp["subtype"])
  175. area := qutil.ObjToString(tmp["area"])
  176. if area == "A" {
  177. area = "全国"
  178. }
  179. info := &Info{}
  180. if IdType {
  181. info.id = qutil.ObjToString(tmp["_id"])
  182. }else {
  183. info.id = BsonTOStringId(tmp["_id"])
  184. }
  185. info.title = qutil.ObjToString(tmp["title"])
  186. info.area = area
  187. info.subtype = subtype
  188. info.buyer = qutil.ObjToString(tmp["buyer"])
  189. info.projectname = qutil.ObjToString(tmp["projectname"])
  190. info.contractnumber = qutil.ObjToString(tmp["contractnumber"])
  191. info.projectcode = qutil.ObjToString(tmp["projectcode"])
  192. info.city = qutil.ObjToString(tmp["city"])
  193. info.agency = qutil.ObjToString(tmp["agency"])
  194. info.winner = qutil.ObjToString(tmp["winner"])
  195. info.budget = qutil.Float64All(tmp["budget"])
  196. info.bidamount = qutil.Float64All(tmp["bidamount"])
  197. info.publishtime = qutil.Int64All(tmp["publishtime"])
  198. info.comeintime = qutil.Int64All(tmp["comeintime"])
  199. info.bidopentime = qutil.Int64All(tmp["bidopentime"])
  200. info.bidopenaddress = qutil.ObjToString(tmp["bidopenaddress"])
  201. info.site = qutil.ObjToString(tmp["site"])
  202. info.href = qutil.ObjToString(tmp["href"])
  203. info.repeatid = qutil.ObjToString(tmp["repeatid"])
  204. info.specialWord = FilterRegTitle.MatchString(info.title)
  205. info.titleSpecialWord = FilterRegTitle_0.MatchString(info.title) ||FilterRegTitle_1.MatchString(info.title) || FilterRegTitle_2.MatchString(info.title)
  206. info.mergemap = *qutil.ObjToMap(tmp["merge_map"])
  207. if info.mergemap == nil {
  208. info.mergemap = make(map[string]interface{}, 0)
  209. }
  210. info.is_site = false
  211. return info
  212. }
  213. //判重方法
  214. func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) {
  215. reason := ""
  216. keys := []string{}
  217. d.lock.Lock()
  218. for k, _ := range d.keys { //不同时间段
  219. if info.area=="全国" {
  220. //匹配所有省
  221. for _,v := range d.areakeys{
  222. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, v))
  223. }
  224. }else {
  225. //匹配指定省
  226. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
  227. }
  228. keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
  229. }
  230. d.lock.Unlock()
  231. L:
  232. for _, k := range keys {
  233. d.lock.Lock()
  234. data := d.data[k]
  235. d.lock.Unlock()
  236. if len(data) > 0 { //对比v 找到同类型,同省或全国的数据作对比
  237. //log.Println(info.area,info.subtype,k)
  238. for _, v := range data {
  239. reason = ""
  240. if v.id == info.id { //正常重复
  241. //log.Println("相同id",info.id)
  242. return false, v, ""
  243. }
  244. //if v.id == "5c761a4fa5cb26b9b73d9512" &&info.id=="5c767bd1a5cb26b9b7a61597" {
  245. // log.Println("测试数据")
  246. //}
  247. if info.subtype == v.subtype {
  248. if info.site != "" {
  249. sitelock.Lock()
  250. dict := SiteMap[info.site]
  251. sitelock.Unlock()
  252. if dict != nil {
  253. if info.area == "全国" && dict["area"] != "" {
  254. info.is_site = true
  255. info.area = qutil.ObjToString(dict["area"])
  256. info.city = qutil.ObjToString(dict["city"])
  257. } else {
  258. if info.city == "" && dict["city"] != "" {
  259. info.is_site = true
  260. info.area = qutil.ObjToString(dict["area"])
  261. info.city = qutil.ObjToString(dict["city"])
  262. }
  263. }
  264. }
  265. }
  266. //前置条件1 - 站点相关
  267. if info.site != "" && info.site == v.site {
  268. if info.href != "" && info.href == v.href {
  269. reason = "href相同"
  270. b = true
  271. source = v
  272. reasons = reason
  273. break L
  274. }
  275. if info.href != "" && info.href != v.href {
  276. reason = "href不同-"
  277. }
  278. }
  279. //前置条件2 - 标题相关,有且一个关键词
  280. if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
  281. info.title != v.title && v.title != "" && info.title != "" {
  282. continue
  283. }
  284. //前置条件3 - 标题相关,均含有关键词
  285. if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
  286. len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
  287. letter1,letter2:=v.title,info.title
  288. res, _ := regexp.Compile("[0-9a-zA-Z]+");
  289. if res.MatchString(letter1)||res.MatchString(letter2) {
  290. letter1=convertArabicNumeralsAndLetters(letter1)
  291. letter2=convertArabicNumeralsAndLetters(letter2)
  292. }
  293. if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
  294. letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
  295. }
  296. if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
  297. continue
  298. }else {
  299. reason = reason + "标题关键词且包含关系"
  300. if !againRepeat(v, info) {//继续二级金额判断
  301. b = true
  302. source = v
  303. reasons = reason
  304. break
  305. }
  306. }
  307. }
  308. //新增快速数据过少判重
  309. if LowHeavy {
  310. repeat := false
  311. if repeat, reason = fastLowQualityHeavy(v, info, reason); repeat {
  312. b = true
  313. source = v
  314. reasons = reason
  315. break
  316. }
  317. }
  318. //代理机构相同-非空相等
  319. if v.agency != "" && info.agency != "" && v.agency == info.agency {
  320. reason = reason + "同机构-"
  321. repeat := false
  322. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  323. b = true
  324. source = v
  325. reasons = reason
  326. break
  327. }
  328. } else {
  329. reason = reason + "非同机构-"
  330. if info.city != "" && info.city == v.city {
  331. reason = reason + "同城-"
  332. repeat := false
  333. if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
  334. b = true
  335. source = v
  336. reasons = reason
  337. break
  338. }
  339. } else {
  340. reason = reason + "不同城-"
  341. repeat := false
  342. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  343. b = true
  344. source = v
  345. reasons = reason
  346. break
  347. }
  348. }
  349. }
  350. }
  351. }
  352. }
  353. }
  354. //往预存数据 d 添加
  355. if !b {
  356. ct := info.comeintime
  357. if Is_Sort ||TimingTask{
  358. ct = info.publishtime
  359. }
  360. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  361. k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
  362. d.lock.Lock()
  363. data := d.data[k]
  364. if data == nil {
  365. data = []*Info{info}
  366. d.data[k] = data
  367. if !d.keys[dkey] {
  368. d.keys[dkey] = true
  369. d.update(ct)
  370. }
  371. } else {
  372. data = append(data, info)
  373. d.data[k] = data
  374. }
  375. //添加省
  376. isAreaExist :=false
  377. for _,v:= range d.areakeys {
  378. if v==info.area {
  379. isAreaExist = true
  380. }
  381. }
  382. if !isAreaExist {
  383. areaArr := d.areakeys
  384. areaArr = append(areaArr,info.area)
  385. d.areakeys = areaArr
  386. }
  387. d.lock.Unlock()
  388. }
  389. return
  390. }
  391. //替换原始数据池
  392. func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
  393. ct := newData.comeintime
  394. if Is_Sort||TimingTask {
  395. ct = newData.publishtime
  396. }
  397. dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
  398. k := fmt.Sprintf("%s_%s_%s", dkey, newData.subtype, newData.area)
  399. d.lock.Lock()
  400. data := d.data[k]
  401. if data == nil {
  402. data = []*Info{newData}
  403. d.data[k] = data
  404. if !d.keys[dkey] {
  405. d.keys[dkey] = true
  406. }
  407. } else {
  408. //遍历替换
  409. isReplace := false
  410. for k, v := range data {
  411. if v.id == oldData.id {
  412. data[k] = newData //同天_type_area 替换
  413. isReplace = true
  414. break
  415. }
  416. }
  417. if !isReplace {
  418. //添加新数据 删除老数据
  419. data = append(data,newData)
  420. ct_old := oldData.comeintime
  421. if Is_Sort||TimingTask {
  422. ct_old = oldData.publishtime
  423. }
  424. dkey_old := qutil.FormatDateByInt64(&ct_old, qutil.Date_yyyyMMdd)
  425. k_old := fmt.Sprintf("%s_%s_%s", dkey_old, oldData.subtype, oldData.area)
  426. data_old := d.data[k_old]
  427. if len(data_old)==1 {
  428. delete(d.data ,k_old)
  429. } else {
  430. for k, v := range data_old {
  431. if v.id == oldData.id {
  432. //删除对应当前的
  433. data_old = append(data_old[:k], data_old[k+1:]...)
  434. break
  435. }
  436. }
  437. d.data[k_old] = data_old
  438. }
  439. }else {
  440. d.data[k] = data
  441. }
  442. }
  443. d.lock.Unlock()
  444. }
  445. func (d *datamap) update(t int64) {
  446. //每天0点清除历史数据
  447. d.keymap = d.GetLatelyFiveDay(t)
  448. m := map[string]bool{}
  449. for _, v := range d.keymap {
  450. m[v] = true
  451. }
  452. all, all1 := 0, 0
  453. for k, v := range d.data {
  454. all += len(v)
  455. if !m[k[:8]] {
  456. delete(d.data, k)
  457. }
  458. }
  459. for k, _ := range d.keys {
  460. if !m[k] {
  461. delete(d.keys, k)
  462. }
  463. }
  464. for _, v := range d.data {
  465. all1 += len(v)
  466. }
  467. //log.Println("更新前后数据:", all, all1)
  468. }
  469. func (d *datamap) GetLatelyFiveDay(t int64) []string {
  470. array := make([]string, d.days)
  471. now := time.Unix(t, 0)
  472. for i := 0; i < d.days; i++ {
  473. array[i] = now.Format(qutil.Date_yyyyMMdd)
  474. now = now.AddDate(0, 0, -1)
  475. }
  476. return array
  477. }
  478. /*
  479. **************************
  480. ******** 以下为判重 ********
  481. **************************
  482. */
  483. //完善判重数据监测-前置条件
  484. func convertArabicNumeralsAndLetters(data string) string {
  485. newData :=data
  486. res1, _ := regexp.Compile("[a-zA-Z]+");
  487. if res1.MatchString(data) {
  488. newData = res1.ReplaceAllStringFunc(data, strings.ToUpper);
  489. }
  490. res2, _ := regexp.Compile("[0-9]+");
  491. if res2.MatchString(newData) {
  492. arr1:=[]string {"0","1","2","3","4","5","6","7","8","9"}
  493. arr2:=[]string {"零","一","二","三","四","五","六","七","八","九"}
  494. for i:=0 ;i<len(arr1) ;i++ {
  495. resTemp ,_:=regexp.Compile(arr1[i])
  496. newData= resTemp.ReplaceAllString(newData, arr2[i]);
  497. }
  498. }
  499. return newData
  500. }
  501. func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
  502. newStr1:=str1
  503. newStr2:=str2
  504. res, _ := regexp.Compile("重新招标");
  505. if res.MatchString(newStr1) {
  506. newStr1 = res.ReplaceAllString(newStr1,"重招");
  507. }
  508. if res.MatchString(newStr2) {
  509. newStr2 = res.ReplaceAllString(newStr2,"重招");
  510. }
  511. return newStr1,newStr2
  512. }
  513. //快速低质量数据判重
  514. func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
  515. //首先判定是否为低质量数据 info目标数据
  516. if info.agency==v.agency&&info.title!=""&&
  517. info.title==v.title &&
  518. info.projectname==""&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
  519. isValue:=0//五要素判断
  520. if info.budget != 0 {//预算
  521. isValue++
  522. }
  523. if info.bidopentime != 0{//开标时间
  524. isValue++
  525. }
  526. if info.bidopenaddress!=""{//开标地点
  527. isValue++
  528. }
  529. if info.winner != ""{//中标单位
  530. isValue++
  531. }
  532. if info.bidamount != 0 {//中标金额
  533. isValue++
  534. }
  535. if isValue==0 {
  536. //if info.site!=v.site {
  537. // log.Println("符合低质量条件条件0",info.id,"--",v.id)
  538. //}
  539. //log.Println("符合低质量条件条件0",info.id,"--",v.id)
  540. reason = reason + "---要素均为空,标题包含关系"
  541. return true, reason
  542. }else if isValue==1 {
  543. isMeet := false
  544. if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
  545. //log.Println("符合低质量条件条件1",info.id,"--",v.id)
  546. reason = reason + "---有且一个要素组合"
  547. return true, reason
  548. }
  549. }else {
  550. }
  551. }
  552. return false,reason
  553. }
  554. //类别细节原因记录
  555. func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
  556. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  557. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  558. info.subtype == "变更" || info.subtype == "其他" {
  559. //招标结果
  560. if info.budget != 0 && info.budget == v.budget{//预算
  561. reason = reason + "---招标类:预算"
  562. return true,reason
  563. }
  564. if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间
  565. reason = reason + "---招标类:开标时间"
  566. return true,reason
  567. }
  568. if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
  569. reason = reason + "---招标类:开标地点"
  570. return true,reason
  571. }
  572. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  573. //中标结果
  574. if v.winner != "" && info.winner == v.winner{//中标单位
  575. reason = reason + "---中标类:中标单位"
  576. return true,reason
  577. }
  578. if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
  579. reason = reason + "---中标类:中标金额"
  580. return true,reason
  581. }
  582. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  583. //合同
  584. if info.budget != 0 && info.budget == v.budget{//预算
  585. reason = reason + "---合同类:预算"
  586. return true,reason
  587. }
  588. if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间
  589. reason = reason + "---合同类:开标时间"
  590. return true,reason
  591. }
  592. if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
  593. reason = reason + "---合同类:开标地点"
  594. return true,reason
  595. }
  596. if v.winner != "" && info.winner == v.winner{//中标单位
  597. reason = reason + "---合同类:中标单位"
  598. return true,reason
  599. }
  600. if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
  601. reason = reason + "---合同类:中标金额"
  602. return true,reason
  603. }
  604. } else {
  605. //招标结果
  606. if info.budget != 0 && info.budget == v.budget{//预算
  607. reason = reason + "---类别空-招标类:预算"
  608. return true,reason
  609. }
  610. if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间
  611. reason = reason + "---类别空-招标类:开标时间"
  612. return true,reason
  613. }
  614. if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
  615. reason = reason + "---类别空-招标类:开标地点"
  616. return true,reason
  617. }
  618. }
  619. return false,reason
  620. }
  621. //判重方法1
  622. func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
  623. isMeet := false
  624. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  625. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  626. info.subtype == "变更" || info.subtype == "其他" {
  627. //招标结果
  628. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  629. if tenderRepeat_C(v, info) {
  630. return false, reason
  631. } else {
  632. reason = reason + "---招标类"
  633. return true, reason
  634. }
  635. } else {
  636. return false, reason
  637. }
  638. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  639. //中标结果
  640. if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
  641. if winningRepeat_C(v, info) {
  642. return false, reason
  643. } else {
  644. reason = reason + "---中标类"
  645. return true, reason
  646. }
  647. } else {
  648. return false, reason
  649. }
  650. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  651. //合同
  652. if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
  653. if contractRepeat_C(v, info) {
  654. return false, reason
  655. } else {
  656. reason = reason + "---合同类"
  657. return true, reason
  658. }
  659. } else {
  660. return false, reason
  661. }
  662. } else {
  663. //招标结果
  664. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  665. if tenderRepeat_C(v, info) {
  666. return false, reason
  667. } else {
  668. reason = reason + "---类别空-招标类"
  669. return true, reason
  670. }
  671. } else {
  672. return false, reason
  673. }
  674. }
  675. return false, reason
  676. }
  677. //判重方法2
  678. func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
  679. isMeet := false
  680. if v.agency == info.agency && v.agency != "" && info.agency != "" {
  681. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  682. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  683. info.subtype == "变更" || info.subtype == "其他" {
  684. //招标结果
  685. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  686. if tenderRepeat_C(v, info) { //有不同
  687. return false, reason
  688. } else {
  689. reason = reason + "---招标类"
  690. return true, reason
  691. }
  692. } else {
  693. return false, reason
  694. }
  695. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  696. //中标结果
  697. if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
  698. if winningRepeat_C(v, info) { //有不同
  699. return false, reason
  700. } else {
  701. reason = reason + "---中标类"
  702. return true, reason
  703. }
  704. } else {
  705. return false, reason
  706. }
  707. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  708. //合同
  709. if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
  710. if contractRepeat_C(v, info) { //有不同
  711. return false, reason
  712. } else {
  713. reason = reason + "---合同类"
  714. return true, reason
  715. }
  716. } else {
  717. return false, reason
  718. }
  719. } else {
  720. //招标结果
  721. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  722. if tenderRepeat_C(v, info) { //有不同
  723. return false, reason
  724. } else {
  725. reason = reason + "---类别空-招标类"
  726. return true, reason
  727. }
  728. } else {
  729. return false, reason
  730. }
  731. }
  732. }
  733. //不同
  734. if v.agency != info.agency && v.agency != "" && info.agency != "" {
  735. return false, reason
  736. }
  737. //机构最少一个为空
  738. if v.agency == "" || info.agency == "" {
  739. var repeat = false
  740. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  741. reason = reason + "---机构最少一个空"
  742. return true, reason
  743. } else {
  744. return false, reason
  745. }
  746. }
  747. return false, reason
  748. }
  749. //招标_A
  750. func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  751. var ss string
  752. p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
  753. if v.projectname != "" && v.projectname == info.projectname {
  754. ss = ss + "p1(名称)-"
  755. p1 = true
  756. }
  757. if v.buyer != "" && v.buyer == info.buyer {
  758. ss = ss + "p2(单位)-"
  759. p2 = true
  760. }
  761. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  762. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  763. ss = ss + "p3(编号组)-"
  764. p3 = true
  765. }
  766. if v.budget != 0 && v.budget == info.budget {
  767. ss = ss + "p4(预算)-"
  768. p4 = true
  769. }
  770. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  771. ss = ss + "p9(开标时间)-"
  772. p9 = true
  773. }
  774. if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  775. ss = ss + "p10(开标地点)-"
  776. p10 = true
  777. }
  778. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  779. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  780. ss = ss + "p11(标题)-"
  781. p11 = true
  782. }
  783. if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
  784. (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
  785. (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
  786. (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
  787. (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
  788. (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
  789. (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
  790. reason = reason + "满足招标A,3要素组合-" + ss + ","
  791. return true, reason
  792. }
  793. return false, reason
  794. }
  795. //招标_B
  796. func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  797. m, n := 0, 0
  798. if v.projectname != "" && v.projectname == info.projectname {
  799. m++
  800. n++
  801. }
  802. if v.buyer != "" && v.buyer == info.buyer {
  803. m++
  804. }
  805. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  806. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  807. m++
  808. }
  809. if v.budget != 0 && v.budget == info.budget {
  810. m++
  811. }
  812. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  813. m++
  814. }
  815. //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  816. // m++
  817. //}
  818. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  819. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  820. m++
  821. n++
  822. }
  823. if m >= 2 {
  824. if n == 2 && m == 2 {
  825. return false, reason
  826. } else {
  827. reason = reason + "满足招标B,六选二,"
  828. return true, reason
  829. }
  830. }
  831. return false, reason
  832. }
  833. //招标_C
  834. func tenderRepeat_C(v *Info, info *Info) bool {
  835. if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
  836. return true
  837. }
  838. //原始地址...
  839. if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
  840. return true
  841. }
  842. //if v.bidopentime != 0 && info.bidopentime != 0 && v.bidopentime != info.bidopentime {
  843. // return true
  844. //}
  845. //if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress {
  846. // return true
  847. //}
  848. return false
  849. }
  850. //中标_A
  851. func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  852. var ss string
  853. p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
  854. if v.projectname != "" && v.projectname == info.projectname {
  855. ss = ss + "p1(项目名称)-"
  856. p1 = true
  857. }
  858. if v.buyer != "" && v.buyer == info.buyer {
  859. ss = ss + "p2(单位)-"
  860. p2 = true
  861. }
  862. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  863. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  864. ss = ss + "p3(编号组)-"
  865. p3 = true
  866. }
  867. //if v.bidamount != 0 && v.bidamount == info.bidamount {
  868. // ss = ss + "p5(中标金)-"
  869. // p5 = true
  870. //}
  871. //if v.winner != "" && v.winner == info.winner {
  872. // ss = ss + "p6(中标人)-"
  873. // p6 = true
  874. //}
  875. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  876. ss = ss + "p5(中标金)-"
  877. p5 = true
  878. }
  879. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  880. ss = ss + "p6(中标人)-"
  881. p6 = true
  882. }
  883. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  884. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  885. ss = ss + "p11(标题)-"
  886. p11 = true
  887. }
  888. if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
  889. (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
  890. (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
  891. (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
  892. (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
  893. (p5 && p6 && p11) {
  894. reason = reason + "满足中标A,3要素组合-" + ss + ","
  895. return true, reason
  896. }
  897. return false, reason
  898. }
  899. //中标_B
  900. func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  901. m, n := 0, 0
  902. if v.projectname != "" && v.projectname == info.projectname {
  903. m++
  904. n++
  905. }
  906. if v.buyer != "" && v.buyer == info.buyer {
  907. m++
  908. }
  909. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  910. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  911. m++
  912. }
  913. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  914. m++
  915. }
  916. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  917. m++
  918. }
  919. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  920. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  921. m++
  922. n++
  923. }
  924. if m >= 2 {
  925. if n == 2 && m == 2 {
  926. return false, reason
  927. } else {
  928. reason = reason + "满足中标B.六选二,"
  929. return true, reason
  930. }
  931. }
  932. return false, reason
  933. }
  934. //中标_C
  935. func winningRepeat_C(v *Info, info *Info) bool {
  936. //if v.bidamount != 0 && info.bidamount != 0 && v.bidamount != info.bidamount {
  937. // return true
  938. //}
  939. //if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
  940. // return true
  941. //}
  942. //
  943. //if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
  944. // return true
  945. //}
  946. //原始地址...
  947. if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
  948. return true
  949. }
  950. return false
  951. }
  952. //合同_A
  953. func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  954. isMeet_1 := false
  955. if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
  956. return true, reason
  957. }
  958. isMeet_2 := false
  959. if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
  960. return true, reason
  961. }
  962. return false, reason
  963. }
  964. //合同_B
  965. func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  966. isMeet_1 := false
  967. if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
  968. return true, reason
  969. }
  970. isMeet_2 := false
  971. if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
  972. return true, reason
  973. }
  974. return false, reason
  975. }
  976. //合同_C
  977. func contractRepeat_C(v *Info, info *Info) bool {
  978. if tenderRepeat_C(v, info) {
  979. return true
  980. }
  981. if winningRepeat_C(v, info) {
  982. return true
  983. }
  984. //合同类 - 新增编号
  985. if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
  986. return true
  987. }
  988. if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
  989. return true
  990. }
  991. return false
  992. }
  993. //再次金额判断
  994. func againRepeat(v *Info, info *Info) bool {
  995. //相同采购单位下
  996. //if info.buyer != "" && v.buyer == info.buyer {
  997. if v.buyer == info.buyer {
  998. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  999. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  1000. info.subtype == "其他" || info.subtype == "变更" {
  1001. //预算金额满足条件
  1002. if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
  1003. return true
  1004. }
  1005. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" ||
  1006. info.subtype == "流标" || info.subtype == "合同" || info.subtype == "验收" ||
  1007. info.subtype == "违规" {
  1008. //中标金额单位满足条件
  1009. if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
  1010. (deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
  1011. return true
  1012. }
  1013. } else {
  1014. }
  1015. }
  1016. return false
  1017. }
  1018. //删除中标单位字符串中多余的空格(含tab)
  1019. func deleteExtraSpace(s string) string {
  1020. //删除字符串中的多余空格,有多个空格时,仅保留一个空格
  1021. s1 := strings.Replace(s, " ", " ", -1) //替换tab为空格
  1022. regstr := "\\s{2,}" //两个及两个以上空格的正则表达式
  1023. reg, _ := regexp.Compile(regstr) //编译正则表达式
  1024. s2 := make([]byte, len(s1)) //定义字符数组切片
  1025. copy(s2, s1) //将字符串复制到切片
  1026. spc_index := reg.FindStringIndex(string(s2)) //在字符串中搜索
  1027. for len(spc_index) > 0 { //找到适配项
  1028. s2 = append(s2[:spc_index[0]+1], s2[spc_index[1]:]...) //删除多余空格
  1029. spc_index = reg.FindStringIndex(string(s2)) //继续在字符串中搜索
  1030. }
  1031. return string(s2)
  1032. }
  1033. //中标金额倍率:10000
  1034. func isBidWinningAmount(f1 float64 ,f2 float64) bool {
  1035. if f1==f2||f1*10000==f2||f2*10000==f1 {
  1036. return false
  1037. }
  1038. return true
  1039. }