dataMethodHeavy.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. package main
  2. import (
  3. "strings"
  4. )
  5. //判重方法1
  6. func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
  7. isMeet := false
  8. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  9. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  10. info.subtype == "变更" || info.subtype == "其他" {
  11. //招标结果
  12. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  13. if tenderRepeat_C(v, info) {
  14. return false, reason
  15. } else {
  16. reason = reason + "---招标类"
  17. return true, reason
  18. }
  19. } else {
  20. return false, reason
  21. }
  22. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  23. //中标结果
  24. if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
  25. if winningRepeat_C(v, info) {
  26. return false, reason
  27. } else {
  28. reason = reason + "---中标类"
  29. return true, reason
  30. }
  31. } else {
  32. return false, reason
  33. }
  34. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  35. //合同
  36. if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
  37. if contractRepeat_C(v, info) {
  38. return false, reason
  39. } else {
  40. reason = reason + "---合同类"
  41. return true, reason
  42. }
  43. } else {
  44. return false, reason
  45. }
  46. } else {
  47. //招标结果
  48. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  49. if tenderRepeat_C(v, info) {
  50. return false, reason
  51. } else {
  52. reason = reason + "---类别空-招标类"
  53. return true, reason
  54. }
  55. } else {
  56. return false, reason
  57. }
  58. }
  59. return false, reason
  60. }
  61. //判重方法2
  62. func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
  63. isMeet := false
  64. isAgency :=false
  65. //招标类-代理机构不同-广泛前后缀比较
  66. if v.agency != info.agency && v.agency != "" && info.agency != "" {
  67. //新增一层判断
  68. if strings.Contains(v.agency, info.agency) || strings.Contains(info.agency, v.agency) {
  69. isAgency = true
  70. }else {
  71. return false, reason
  72. }
  73. }
  74. if (v.agency == info.agency && v.agency != "" && info.agency != "")|| isAgency {
  75. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  76. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  77. info.subtype == "变更" || info.subtype == "其他" {
  78. //招标结果
  79. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  80. if tenderRepeat_C(v, info) { //有不同
  81. return false, reason
  82. } else {
  83. reason = reason + "---招标类"
  84. return true, reason
  85. }
  86. } else {
  87. return false, reason
  88. }
  89. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  90. //中标结果
  91. if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
  92. if winningRepeat_C(v, info) { //有不同
  93. return false, reason
  94. } else {
  95. reason = reason + "---中标类"
  96. return true, reason
  97. }
  98. } else {
  99. return false, reason
  100. }
  101. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  102. //合同
  103. if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
  104. if contractRepeat_C(v, info) { //有不同
  105. return false, reason
  106. } else {
  107. reason = reason + "---合同类"
  108. return true, reason
  109. }
  110. } else {
  111. return false, reason
  112. }
  113. } else {
  114. //招标结果
  115. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  116. if tenderRepeat_C(v, info) { //有不同
  117. return false, reason
  118. } else {
  119. reason = reason + "---类别空-招标类"
  120. return true, reason
  121. }
  122. } else {
  123. return false, reason
  124. }
  125. }
  126. }
  127. //机构最2少一个为空
  128. if v.agency == "" || info.agency == "" {
  129. var repeat = false
  130. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  131. reason = reason + "---机构最少一个空"
  132. return true, reason
  133. } else {
  134. return false, reason
  135. }
  136. }
  137. return false, reason
  138. }
  139. //招标_A
  140. func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  141. var ss string
  142. p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
  143. if v.projectname != "" && v.projectname == info.projectname {
  144. ss = ss + "p1-名称-"
  145. p1 = true
  146. }
  147. if v.buyer != "" && v.buyer == info.buyer {
  148. ss = ss + "p2-单位-"
  149. p2 = true
  150. }
  151. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  152. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  153. ss = ss + "p3-编号组-"
  154. p3 = true
  155. }
  156. if v.budget != 0 && v.budget == info.budget {
  157. ss = ss + "p4-预算-"
  158. p4 = true
  159. }
  160. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  161. ss = ss + "p9-开标时间相同-"
  162. p9 = true
  163. }
  164. if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  165. ss = ss + "p10-开标地点-"
  166. p10 = true
  167. }
  168. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  169. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  170. ss = ss + "p11-标题-"
  171. p11 = true
  172. }
  173. if info.subtype !=""&&(p1 && p3 && p11) {
  174. reason = reason + "满足招标A,3要素组合-" + ss + ","
  175. return true, reason
  176. }
  177. if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
  178. (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
  179. (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
  180. (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
  181. (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
  182. (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
  183. (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
  184. reason = reason + "满足招标A,3要素组合-" + ss + ","
  185. return true, reason
  186. }
  187. return false, reason
  188. }
  189. //招标_B
  190. func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  191. m, n := 0, 0
  192. if v.projectname != "" && v.projectname == info.projectname {
  193. m++
  194. n++
  195. }
  196. if v.buyer != "" && v.buyer == info.buyer {
  197. m++
  198. }
  199. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  200. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  201. m++
  202. }
  203. if v.budget != 0 && v.budget == info.budget {
  204. m++
  205. }
  206. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  207. m++
  208. }
  209. //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  210. // m++
  211. //}
  212. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  213. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  214. m++
  215. n++
  216. }
  217. if m >= 2 {
  218. if n == 2 && m == 2 {
  219. return false, reason
  220. } else {
  221. reason = reason + "满足招标B,六选二,"
  222. return true, reason
  223. }
  224. }
  225. return false, reason
  226. }
  227. //招标_C
  228. func tenderRepeat_C(v *Info, info *Info) bool {
  229. if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
  230. return true
  231. }
  232. if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime,v.bidopentime) {
  233. return true
  234. }
  235. return false
  236. }
  237. //中标_A
  238. func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  239. var ss string
  240. p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
  241. if v.projectname != "" && v.projectname == info.projectname {
  242. ss = ss + "p1-项目名称-"
  243. p1 = true
  244. }
  245. if v.buyer != "" && v.buyer == info.buyer {
  246. ss = ss + "p2-单位-"
  247. p2 = true
  248. }
  249. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  250. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  251. ss = ss + "p3-编号组--"
  252. p3 = true
  253. }
  254. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  255. ss = ss + "p5-中标金-"
  256. p5 = true
  257. }
  258. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  259. ss = ss + "p6-中标人-"
  260. p6 = true
  261. }
  262. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  263. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  264. ss = ss + "p11-标题-"
  265. p11 = true
  266. }
  267. if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
  268. (p1 && p2 && p11)|| (p1 && p3 && p11)||
  269. (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
  270. (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
  271. (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
  272. (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
  273. (p5 && p6 && p11) {
  274. reason = reason + "满足中标A,3要素组合-" + ss + ","
  275. return true, reason
  276. }
  277. return false, reason
  278. }
  279. //中标_B
  280. func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  281. m, n := 0, 0
  282. if v.projectname != "" && v.projectname == info.projectname {
  283. m++
  284. n++
  285. }
  286. if v.buyer != "" && v.buyer == info.buyer {
  287. m++
  288. }
  289. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  290. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  291. m++
  292. }
  293. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  294. m++
  295. }
  296. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  297. m++
  298. }
  299. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  300. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  301. m++
  302. n++
  303. }
  304. if m >= 2 {
  305. if n == 2 && m == 2 {
  306. return false, reason
  307. } else {
  308. reason = reason + "满足中标B.六选二,"
  309. return true, reason
  310. }
  311. }
  312. return false, reason
  313. }
  314. //中标_C
  315. func winningRepeat_C(v *Info, info *Info) bool {
  316. if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
  317. //避免抽错金额-
  318. if ((v.projectcode!=""&&info.projectcode!=""&&v.projectcode==info.projectcode)||
  319. (v.contractnumber!=""&&info.contractnumber!=""&&v.contractnumber==info.contractnumber)) &&
  320. (v.winner!=""&&info.winner!=""&&v.winner==info.winner) {
  321. return false
  322. }
  323. return true
  324. }
  325. if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
  326. return true
  327. }
  328. return false
  329. }
  330. //合同_A
  331. func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  332. isMeet_1 := false
  333. if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
  334. return true, reason
  335. }
  336. isMeet_2 := false
  337. if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
  338. return true, reason
  339. }
  340. return false, reason
  341. }
  342. //合同_B
  343. func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  344. isMeet_1 := false
  345. if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
  346. return true, reason
  347. }
  348. isMeet_2 := false
  349. if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
  350. return true, reason
  351. }
  352. return false, reason
  353. }
  354. //合同_C
  355. func contractRepeat_C(v *Info, info *Info) bool {
  356. if tenderRepeat_C(v, info) {
  357. return true
  358. }
  359. if winningRepeat_C(v, info) {
  360. return true
  361. }
  362. //合同类 - 新增编号
  363. if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
  364. return true
  365. }
  366. if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
  367. return true
  368. }
  369. return false
  370. }
  371. //快速低质量数据判重
  372. func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
  373. //if !isTheSameDay(v.publishtime,info.publishtime) {
  374. // return false,reason
  375. //}
  376. //区间间隔24小时
  377. if !isTimeIntervalPeriod(v.publishtime,info.publishtime) {
  378. return false,reason
  379. }
  380. //首先判定是否为低质量数据 info目标数据
  381. if info.title!=""&&(info.agency==""||v.agency=="")&&
  382. info.title==v.title&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
  383. isValue:=0//五要素判断
  384. if info.projectname != "" {//项目名称
  385. isValue++
  386. }
  387. if info.budget != 0 {//预算
  388. isValue++
  389. }
  390. if info.winner != ""{//中标单位
  391. isValue++
  392. }
  393. if info.bidamount != 0 {//中标金额
  394. isValue++
  395. }
  396. if isValue==0 {
  397. reason = reason + "---低质量-要素均为空-标题满足"
  398. return true, reason
  399. }else if isValue==1 {
  400. isMeet := false
  401. if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
  402. reason = reason + "---低质量-有且一个要素组合"
  403. return true, reason
  404. }
  405. }else {
  406. }
  407. }
  408. return false,reason
  409. }
  410. //类别细节原因记录
  411. func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
  412. if info.projectname!="" && info.projectname == v.projectname{//项目名称
  413. reason = reason + "---项目名称"
  414. return true,reason
  415. }
  416. if info.budget != 0 && info.budget == v.budget{//预算
  417. reason = reason + "---预算"
  418. return true,reason
  419. }
  420. if v.winner != "" && info.winner == v.winner{//中标单位
  421. reason = reason + "---中标单位"
  422. return true,reason
  423. }
  424. if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
  425. reason = reason + "---中标金额"
  426. return true,reason
  427. }
  428. return false,reason
  429. }