dataMethodHeavy.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. package main
  2. import "strings"
  3. //判重方法1
  4. func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
  5. isMeet := false
  6. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  7. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  8. info.subtype == "变更" || info.subtype == "其他" {
  9. //招标结果
  10. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  11. if tenderRepeat_C(v, info) {
  12. return false, reason
  13. } else {
  14. reason = reason + "---招标类"
  15. return true, reason
  16. }
  17. } else {
  18. return false, reason
  19. }
  20. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  21. //中标结果
  22. if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
  23. if winningRepeat_C(v, info) {
  24. return false, reason
  25. } else {
  26. reason = reason + "---中标类"
  27. return true, reason
  28. }
  29. } else {
  30. return false, reason
  31. }
  32. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  33. //合同
  34. if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
  35. if contractRepeat_C(v, info) {
  36. return false, reason
  37. } else {
  38. reason = reason + "---合同类"
  39. return true, reason
  40. }
  41. } else {
  42. return false, reason
  43. }
  44. } else {
  45. //招标结果
  46. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  47. if tenderRepeat_C(v, info) {
  48. return false, reason
  49. } else {
  50. reason = reason + "---类别空-招标类"
  51. return true, reason
  52. }
  53. } else {
  54. return false, reason
  55. }
  56. }
  57. return false, reason
  58. }
  59. //判重方法2
  60. func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
  61. isMeet := false
  62. if v.agency == info.agency && v.agency != "" && info.agency != "" {
  63. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  64. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  65. info.subtype == "变更" || info.subtype == "其他" {
  66. //招标结果
  67. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  68. if tenderRepeat_C(v, info) { //有不同
  69. return false, reason
  70. } else {
  71. reason = reason + "---招标类"
  72. return true, reason
  73. }
  74. } else {
  75. return false, reason
  76. }
  77. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  78. //中标结果
  79. if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
  80. if winningRepeat_C(v, info) { //有不同
  81. return false, reason
  82. } else {
  83. reason = reason + "---中标类"
  84. return true, reason
  85. }
  86. } else {
  87. return false, reason
  88. }
  89. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  90. //合同
  91. if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
  92. if contractRepeat_C(v, info) { //有不同
  93. return false, reason
  94. } else {
  95. reason = reason + "---合同类"
  96. return true, reason
  97. }
  98. } else {
  99. return false, reason
  100. }
  101. } else {
  102. //招标结果
  103. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  104. if tenderRepeat_C(v, info) { //有不同
  105. return false, reason
  106. } else {
  107. reason = reason + "---类别空-招标类"
  108. return true, reason
  109. }
  110. } else {
  111. return false, reason
  112. }
  113. }
  114. }
  115. //不同
  116. if v.agency != info.agency && v.agency != "" && info.agency != "" {
  117. return false, reason
  118. }
  119. //机构最少一个为空
  120. if v.agency == "" || info.agency == "" {
  121. var repeat = false
  122. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  123. reason = reason + "---机构最少一个空"
  124. return true, reason
  125. } else {
  126. return false, reason
  127. }
  128. }
  129. return false, reason
  130. }
  131. //招标_A
  132. func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  133. var ss string
  134. p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
  135. if v.projectname != "" && v.projectname == info.projectname {
  136. ss = ss + "p1-名称-"
  137. p1 = true
  138. }
  139. if v.buyer != "" && v.buyer == info.buyer {
  140. ss = ss + "p2-单位-"
  141. p2 = true
  142. }
  143. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  144. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  145. ss = ss + "p3-编号组-"
  146. p3 = true
  147. }
  148. if v.budget != 0 && v.budget == info.budget {
  149. ss = ss + "p4-预算-"
  150. p4 = true
  151. }
  152. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  153. ss = ss + "p9-开标时间相同-"
  154. p9 = true
  155. }
  156. if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  157. ss = ss + "p10-开标地点-"
  158. p10 = true
  159. }
  160. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  161. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  162. ss = ss + "p11-标题-"
  163. p11 = true
  164. }
  165. if info.subtype !=""&&(p1 && p3 && p11) {
  166. reason = reason + "满足招标A,3要素组合-" + ss + ","
  167. return true, reason
  168. }
  169. if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
  170. (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
  171. (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
  172. (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
  173. (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
  174. (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
  175. (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
  176. reason = reason + "满足招标A,3要素组合-" + ss + ","
  177. return true, reason
  178. }
  179. return false, reason
  180. }
  181. //招标_B
  182. func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  183. m, n := 0, 0
  184. if v.projectname != "" && v.projectname == info.projectname {
  185. m++
  186. n++
  187. }
  188. if v.buyer != "" && v.buyer == info.buyer {
  189. m++
  190. }
  191. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  192. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  193. m++
  194. }
  195. if v.budget != 0 && v.budget == info.budget {
  196. m++
  197. }
  198. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  199. m++
  200. }
  201. //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  202. // m++
  203. //}
  204. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  205. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  206. m++
  207. n++
  208. }
  209. if m >= 2 {
  210. if n == 2 && m == 2 {
  211. return false, reason
  212. } else {
  213. reason = reason + "满足招标B,六选二,"
  214. return true, reason
  215. }
  216. }
  217. return false, reason
  218. }
  219. //招标_C
  220. func tenderRepeat_C(v *Info, info *Info) bool {
  221. if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
  222. return true
  223. }
  224. if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime,v.bidopentime) {
  225. return true
  226. }
  227. return false
  228. }
  229. //中标_A
  230. func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  231. var ss string
  232. p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
  233. if v.projectname != "" && v.projectname == info.projectname {
  234. ss = ss + "p1-项目名称-"
  235. p1 = true
  236. }
  237. if v.buyer != "" && v.buyer == info.buyer {
  238. ss = ss + "p2-单位-"
  239. p2 = true
  240. }
  241. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  242. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  243. ss = ss + "p3-编号组--"
  244. p3 = true
  245. }
  246. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  247. ss = ss + "p5-中标金-"
  248. p5 = true
  249. }
  250. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  251. ss = ss + "p6-中标人-"
  252. p6 = true
  253. }
  254. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  255. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  256. ss = ss + "p11-标题-"
  257. p11 = true
  258. }
  259. if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
  260. (p1 && p2 && p11)|| (p1 && p3 && p11)||
  261. (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
  262. (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
  263. (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
  264. (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
  265. (p5 && p6 && p11) {
  266. reason = reason + "满足中标A,3要素组合-" + ss + ","
  267. return true, reason
  268. }
  269. return false, reason
  270. }
  271. //中标_B
  272. func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  273. m, n := 0, 0
  274. if v.projectname != "" && v.projectname == info.projectname {
  275. m++
  276. n++
  277. }
  278. if v.buyer != "" && v.buyer == info.buyer {
  279. m++
  280. }
  281. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  282. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  283. m++
  284. }
  285. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
  286. m++
  287. }
  288. if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
  289. m++
  290. }
  291. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  292. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  293. m++
  294. n++
  295. }
  296. if m >= 2 {
  297. if n == 2 && m == 2 {
  298. return false, reason
  299. } else {
  300. reason = reason + "满足中标B.六选二,"
  301. return true, reason
  302. }
  303. }
  304. return false, reason
  305. }
  306. //中标_C
  307. func winningRepeat_C(v *Info, info *Info) bool {
  308. if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
  309. //避免抽错金额-
  310. if ((v.projectcode!=""&&info.projectcode!=""&&v.projectcode==info.projectcode)||
  311. (v.contractnumber!=""&&info.contractnumber!=""&&v.contractnumber==info.contractnumber)) &&
  312. (v.winner!=""&&info.winner!=""&&v.winner==info.winner) {
  313. return false
  314. }
  315. return true
  316. }
  317. if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
  318. return true
  319. }
  320. return false
  321. }
  322. //合同_A
  323. func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  324. isMeet_1 := false
  325. if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
  326. return true, reason
  327. }
  328. isMeet_2 := false
  329. if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
  330. return true, reason
  331. }
  332. return false, reason
  333. }
  334. //合同_B
  335. func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  336. isMeet_1 := false
  337. if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
  338. return true, reason
  339. }
  340. isMeet_2 := false
  341. if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
  342. return true, reason
  343. }
  344. return false, reason
  345. }
  346. //合同_C
  347. func contractRepeat_C(v *Info, info *Info) bool {
  348. if tenderRepeat_C(v, info) {
  349. return true
  350. }
  351. if winningRepeat_C(v, info) {
  352. return true
  353. }
  354. //合同类 - 新增编号
  355. if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
  356. return true
  357. }
  358. if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
  359. return true
  360. }
  361. return false
  362. }
  363. //快速低质量数据判重
  364. func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
  365. if !isTheSameDay(v.publishtime,info.publishtime) {
  366. return false,reason
  367. }
  368. //首先判定是否为低质量数据 info目标数据
  369. if info.title!=""&&(info.agency==""||v.agency=="")&&
  370. info.title==v.title&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
  371. isValue:=0//五要素判断
  372. if info.projectname != "" {//项目名称
  373. isValue++
  374. }
  375. if info.budget != 0 {//预算
  376. isValue++
  377. }
  378. if info.winner != ""{//中标单位
  379. isValue++
  380. }
  381. if info.bidamount != 0 {//中标金额
  382. isValue++
  383. }
  384. if isValue==0 {
  385. reason = reason + "---低质量-要素均为空-标题满足"
  386. return true, reason
  387. }else if isValue==1 {
  388. isMeet := false
  389. if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
  390. reason = reason + "---低质量-有且一个要素组合"
  391. return true, reason
  392. }
  393. }else {
  394. }
  395. }
  396. return false,reason
  397. }
  398. //类别细节原因记录
  399. func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
  400. if info.projectname!="" && info.projectname == v.projectname{//项目名称
  401. reason = reason + "---项目名称"
  402. return true,reason
  403. }
  404. if info.budget != 0 && info.budget == v.budget{//预算
  405. reason = reason + "---预算"
  406. return true,reason
  407. }
  408. if v.winner != "" && info.winner == v.winner{//中标单位
  409. reason = reason + "---中标单位"
  410. return true,reason
  411. }
  412. if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
  413. reason = reason + "---中标金额"
  414. return true,reason
  415. }
  416. return false,reason
  417. }