dataMethodHeavy.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. package main
  2. import "strings"
  3. //判重方法1
  4. func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
  5. isMeet := false
  6. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  7. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  8. info.subtype == "变更" || info.subtype == "其他" {
  9. //招标结果
  10. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  11. if tenderRepeat_C(v, info) {
  12. return false, reason
  13. } else {
  14. reason = reason + "---招标类"
  15. return true, reason
  16. }
  17. } else {
  18. return false, reason
  19. }
  20. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  21. //中标结果
  22. if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
  23. if winningRepeat_C(v, info) {
  24. return false, reason
  25. } else {
  26. reason = reason + "---中标类"
  27. return true, reason
  28. }
  29. } else {
  30. return false, reason
  31. }
  32. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  33. //合同
  34. if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
  35. if contractRepeat_C(v, info) {
  36. return false, reason
  37. } else {
  38. reason = reason + "---合同类"
  39. return true, reason
  40. }
  41. } else {
  42. return false, reason
  43. }
  44. } else {
  45. //招标结果
  46. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  47. if tenderRepeat_C(v, info) {
  48. return false, reason
  49. } else {
  50. reason = reason + "---类别空-招标类"
  51. return true, reason
  52. }
  53. } else {
  54. return false, reason
  55. }
  56. }
  57. return false, reason
  58. }
  59. //判重方法2
  60. func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
  61. isMeet := false
  62. isAgency :=false
  63. //招标类-代理机构不同-广泛前后缀比较
  64. if v.agency != info.agency && v.agency != "" && info.agency != "" {
  65. //新增一层判断
  66. if strings.Contains(v.agency, info.agency) || strings.Contains(info.agency, v.agency) {
  67. isAgency = true
  68. }else {
  69. return false, reason
  70. }
  71. }
  72. if (v.agency == info.agency && v.agency != "" && info.agency != "")|| isAgency {
  73. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  74. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  75. info.subtype == "变更" || info.subtype == "其他" {
  76. //招标结果
  77. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  78. if tenderRepeat_C(v, info) { //有不同
  79. return false, reason
  80. } else {
  81. reason = reason + "---招标类"
  82. return true, reason
  83. }
  84. } else {
  85. return false, reason
  86. }
  87. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  88. //中标结果
  89. if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
  90. if winningRepeat_C(v, info) { //有不同
  91. return false, reason
  92. } else {
  93. reason = reason + "---中标类"
  94. return true, reason
  95. }
  96. } else {
  97. return false, reason
  98. }
  99. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  100. //合同
  101. if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
  102. if contractRepeat_C(v, info) { //有不同
  103. return false, reason
  104. } else {
  105. reason = reason + "---合同类"
  106. return true, reason
  107. }
  108. } else {
  109. return false, reason
  110. }
  111. } else {
  112. //招标结果
  113. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  114. if tenderRepeat_C(v, info) { //有不同
  115. return false, reason
  116. } else {
  117. reason = reason + "---类别空-招标类"
  118. return true, reason
  119. }
  120. } else {
  121. return false, reason
  122. }
  123. }
  124. }
  125. //机构最2少一个为空
  126. if v.agency == "" || info.agency == "" {
  127. var repeat = false
  128. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  129. reason = reason + "---机构最少一个空"
  130. return true, reason
  131. } else {
  132. return false, reason
  133. }
  134. }
  135. return false, reason
  136. }
  137. //招标_A
  138. func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  139. var ss string
  140. p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
  141. if v.projectname != "" && v.projectname == info.projectname {
  142. ss = ss + "p1-名称-"
  143. p1 = true
  144. }
  145. if v.buyer != "" && v.buyer == info.buyer {
  146. ss = ss + "p2-单位-"
  147. p2 = true
  148. }
  149. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  150. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  151. ss = ss + "p3-编号组-"
  152. p3 = true
  153. }
  154. if v.budget != 0 && v.budget == info.budget {
  155. ss = ss + "p4-预算-"
  156. p4 = true
  157. }
  158. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  159. ss = ss + "p9-开标时间相同-"
  160. p9 = true
  161. }
  162. if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  163. ss = ss + "p10-开标地点-"
  164. p10 = true
  165. }
  166. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  167. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  168. ss = ss + "p11-标题-"
  169. p11 = true
  170. }
  171. if info.subtype !=""&&(p1 && p3 && p11) {
  172. reason = reason + "满足招标A,3要素组合-" + ss + ","
  173. return true, reason
  174. }
  175. if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
  176. (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
  177. (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
  178. (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
  179. (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
  180. (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
  181. (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
  182. reason = reason + "满足招标A,3要素组合-" + ss + ","
  183. return true, reason
  184. }
  185. return false, reason
  186. }
  187. //招标_B
  188. func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  189. m, n := 0, 0
  190. if v.projectname != "" && v.projectname == info.projectname {
  191. m++
  192. n++
  193. }
  194. if v.buyer != "" && v.buyer == info.buyer {
  195. m++
  196. }
  197. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  198. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  199. m++
  200. }
  201. if v.budget != 0 && v.budget == info.budget {
  202. m++
  203. }
  204. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  205. m++
  206. }
  207. //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  208. // m++
  209. //}
  210. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  211. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  212. m++
  213. n++
  214. }
  215. if m >= 2 {
  216. if n == 2 && m == 2 {
  217. return false, reason
  218. } else {
  219. reason = reason + "满足招标B,六选二,"
  220. return true, reason
  221. }
  222. }
  223. return false, reason
  224. }
  225. //招标_C
  226. func tenderRepeat_C(v *Info, info *Info) bool {
  227. if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
  228. return true
  229. }
  230. if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime,v.bidopentime) {
  231. return true
  232. }
  233. return false
  234. }
  235. //中标_A
  236. func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  237. var ss string
  238. p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
  239. if v.projectname != "" && v.projectname == info.projectname {
  240. ss = ss + "p1-项目名称-"
  241. p1 = true
  242. }
  243. if v.buyer != "" && v.buyer == info.buyer {
  244. ss = ss + "p2-单位-"
  245. p2 = true
  246. }
  247. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  248. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  249. ss = ss + "p3-编号组--"
  250. p3 = true
  251. }
  252. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  253. ss = ss + "p5-中标金-"
  254. p5 = true
  255. }
  256. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  257. ss = ss + "p6-中标人-"
  258. p6 = true
  259. }
  260. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  261. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  262. ss = ss + "p11-标题-"
  263. p11 = true
  264. }
  265. if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
  266. (p1 && p2 && p11)|| (p1 && p3 && p11)||
  267. (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
  268. (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
  269. (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
  270. (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
  271. (p5 && p6 && p11) {
  272. reason = reason + "满足中标A,3要素组合-" + ss + ","
  273. return true, reason
  274. }
  275. return false, reason
  276. }
  277. //中标_B
  278. func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  279. m, n := 0, 0
  280. if v.projectname != "" && v.projectname == info.projectname {
  281. m++
  282. n++
  283. }
  284. if v.buyer != "" && v.buyer == info.buyer {
  285. m++
  286. }
  287. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  288. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  289. m++
  290. }
  291. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  292. m++
  293. }
  294. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  295. m++
  296. }
  297. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  298. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  299. m++
  300. n++
  301. }
  302. if m >= 2 {
  303. if n == 2 && m == 2 {
  304. return false, reason
  305. } else {
  306. reason = reason + "满足中标B.六选二,"
  307. return true, reason
  308. }
  309. }
  310. return false, reason
  311. }
  312. //中标_C
  313. func winningRepeat_C(v *Info, info *Info) bool {
  314. if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
  315. //避免抽错金额-
  316. if ((v.projectcode!=""&&info.projectcode!=""&&v.projectcode==info.projectcode)||
  317. (v.contractnumber!=""&&info.contractnumber!=""&&v.contractnumber==info.contractnumber)) &&
  318. (v.winner!=""&&info.winner!=""&&v.winner==info.winner) {
  319. return false
  320. }
  321. return true
  322. }
  323. if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
  324. return true
  325. }
  326. return false
  327. }
  328. //合同_A
  329. func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  330. isMeet_1 := false
  331. if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
  332. return true, reason
  333. }
  334. isMeet_2 := false
  335. if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
  336. return true, reason
  337. }
  338. return false, reason
  339. }
  340. //合同_B
  341. func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  342. isMeet_1 := false
  343. if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
  344. return true, reason
  345. }
  346. isMeet_2 := false
  347. if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
  348. return true, reason
  349. }
  350. return false, reason
  351. }
  352. //合同_C
  353. func contractRepeat_C(v *Info, info *Info) bool {
  354. if tenderRepeat_C(v, info) {
  355. return true
  356. }
  357. if winningRepeat_C(v, info) {
  358. return true
  359. }
  360. //合同类 - 新增编号
  361. if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
  362. return true
  363. }
  364. if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
  365. return true
  366. }
  367. return false
  368. }
  369. //快速低质量数据判重
  370. func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
  371. //if !isTheSameDay(v.publishtime,info.publishtime) {
  372. // return false,reason
  373. //}
  374. //区间间隔24小时
  375. if !isTimeIntervalPeriod(v.publishtime,info.publishtime) {
  376. return false,reason
  377. }
  378. //首先判定是否为低质量数据 info目标数据
  379. if info.title!=""&&(info.agency==""||v.agency=="")&&
  380. info.title==v.title&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
  381. isValue:=0//五要素判断
  382. if info.projectname != "" {//项目名称
  383. isValue++
  384. }
  385. if info.budget != 0 {//预算
  386. isValue++
  387. }
  388. if info.winner != ""{//中标单位
  389. isValue++
  390. }
  391. if info.bidamount != 0 {//中标金额
  392. isValue++
  393. }
  394. if isValue==0 {
  395. reason = reason + "---低质量-要素均为空-标题满足"
  396. return true, reason
  397. }else if isValue==1 {
  398. isMeet := false
  399. if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
  400. reason = reason + "---低质量-有且一个要素组合"
  401. return true, reason
  402. }
  403. }else {
  404. }
  405. }
  406. return false,reason
  407. }
  408. //类别细节原因记录
  409. func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
  410. if info.projectname!="" && info.projectname == v.projectname{//项目名称
  411. reason = reason + "---项目名称"
  412. return true,reason
  413. }
  414. if info.budget != 0 && info.budget == v.budget{//预算
  415. reason = reason + "---预算"
  416. return true,reason
  417. }
  418. if v.winner != "" && info.winner == v.winner{//中标单位
  419. reason = reason + "---中标单位"
  420. return true,reason
  421. }
  422. if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
  423. reason = reason + "---中标金额"
  424. return true,reason
  425. }
  426. return false,reason
  427. }