dataMethodHeavy.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. package main
  2. import (
  3. "strings"
  4. )
  5. // 判重方法1
  6. func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
  7. isMeet := false
  8. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  9. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  10. info.subtype == "变更" || info.subtype == "其他" {
  11. //招标结果
  12. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  13. if tenderRepeat_C(v, info) {
  14. return false, reason
  15. } else {
  16. reason = reason + "---招标类"
  17. return true, reason
  18. }
  19. } else {
  20. return false, reason
  21. }
  22. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  23. //中标结果
  24. if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
  25. if winningRepeat_C(v, info) {
  26. return false, reason
  27. } else {
  28. reason = reason + "---中标类"
  29. return true, reason
  30. }
  31. } else {
  32. return false, reason
  33. }
  34. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  35. //合同
  36. if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
  37. if contractRepeat_C(v, info) {
  38. return false, reason
  39. } else {
  40. reason = reason + "---合同类"
  41. return true, reason
  42. }
  43. } else {
  44. return false, reason
  45. }
  46. } else {
  47. //招标结果
  48. if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
  49. if tenderRepeat_C(v, info) {
  50. return false, reason
  51. } else {
  52. reason = reason + "---类别空-招标类"
  53. return true, reason
  54. }
  55. } else {
  56. return false, reason
  57. }
  58. }
  59. return false, reason
  60. }
  61. // 判重方法2
  62. func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
  63. isMeet := false
  64. isAgency := false
  65. //招标类-代理机构不同-广泛前后缀比较
  66. if v.agency != info.agency && v.agency != "" && info.agency != "" {
  67. //新增一层判断
  68. if strings.Contains(v.agency, info.agency) || strings.Contains(info.agency, v.agency) {
  69. isAgency = true
  70. } else {
  71. return false, reason
  72. }
  73. }
  74. if (v.agency == info.agency && v.agency != "" && info.agency != "") || isAgency {
  75. if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
  76. info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
  77. info.subtype == "变更" || info.subtype == "其他" {
  78. //招标结果
  79. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  80. if tenderRepeat_C(v, info) { //有不同
  81. return false, reason
  82. } else {
  83. reason = reason + "---招标类"
  84. return true, reason
  85. }
  86. } else {
  87. return false, reason
  88. }
  89. } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
  90. //中标结果
  91. if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
  92. if winningRepeat_C(v, info) { //有不同
  93. return false, reason
  94. } else {
  95. reason = reason + "---中标类"
  96. return true, reason
  97. }
  98. } else {
  99. return false, reason
  100. }
  101. } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  102. //合同
  103. if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
  104. if contractRepeat_C(v, info) { //有不同
  105. return false, reason
  106. } else {
  107. reason = reason + "---合同类"
  108. return true, reason
  109. }
  110. } else {
  111. return false, reason
  112. }
  113. } else {
  114. //招标结果
  115. if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
  116. if tenderRepeat_C(v, info) { //有不同
  117. return false, reason
  118. } else {
  119. reason = reason + "---类别空-招标类"
  120. return true, reason
  121. }
  122. } else {
  123. return false, reason
  124. }
  125. }
  126. }
  127. //机构最2少一个为空
  128. if v.agency == "" || info.agency == "" {
  129. var repeat = false
  130. if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
  131. reason = reason + "---机构最少一个空"
  132. return true, reason
  133. } else {
  134. return false, reason
  135. }
  136. }
  137. return false, reason
  138. }
  139. // 招标_A
  140. func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  141. var ss string
  142. p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
  143. if v.projectname != "" && v.projectname == info.projectname {
  144. ss = ss + "p1-名称-"
  145. p1 = true
  146. }
  147. if v.buyer != "" && v.buyer == info.buyer {
  148. ss = ss + "p2-单位-"
  149. p2 = true
  150. }
  151. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  152. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  153. ss = ss + "p3-编号组-"
  154. p3 = true
  155. }
  156. if v.budget != 0 && v.budget == info.budget {
  157. ss = ss + "p4-预算-"
  158. p4 = true
  159. }
  160. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  161. ss = ss + "p9-开标时间相同-"
  162. p9 = true
  163. }
  164. if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  165. ss = ss + "p10-开标地点-"
  166. p10 = true
  167. }
  168. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 {
  169. if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) {
  170. ss = ss + "p11-标题-"
  171. p11 = true
  172. } else {
  173. if v.c_title != "" && info.c_title != "" &&
  174. (strings.Contains(v.c_title, info.c_title) || strings.Contains(info.c_title, v.c_title)) {
  175. ss = ss + "p11-标题-"
  176. p11 = true
  177. }
  178. }
  179. }
  180. if info.subtype != "" && (p1 && p3 && p11) {
  181. reason = reason + "满足招标A,3要素组合-" + ss + ","
  182. return true, reason
  183. }
  184. if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
  185. (p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) || (p1 && p3 && p4) ||
  186. (p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
  187. (p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
  188. (p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
  189. (p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
  190. (p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
  191. reason = reason + "满足招标A,3要素组合-" + ss + ","
  192. return true, reason
  193. }
  194. return false, reason
  195. }
  196. // 招标_B
  197. func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  198. m, n := 0, 0
  199. if v.projectname != "" && v.projectname == info.projectname {
  200. m++
  201. n++
  202. }
  203. if v.buyer != "" && v.buyer == info.buyer {
  204. m++
  205. }
  206. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  207. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  208. m++
  209. }
  210. if v.budget != 0 && v.budget == info.budget {
  211. m++
  212. }
  213. if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
  214. m++
  215. }
  216. //if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
  217. // m++
  218. //}
  219. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  220. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  221. m++
  222. n++
  223. }
  224. if m >= 2 {
  225. if n == 2 && m == 2 {
  226. return false, reason
  227. } else {
  228. reason = reason + "满足招标B,六选二,"
  229. return true, reason
  230. }
  231. }
  232. return false, reason
  233. }
  234. // 招标_C
  235. func tenderRepeat_C(v *Info, info *Info) bool {
  236. if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
  237. return true
  238. }
  239. if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime, v.bidopentime) {
  240. return true
  241. }
  242. return false
  243. }
  244. // 中标_A
  245. func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  246. var ss string
  247. p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
  248. if v.projectname != "" && v.projectname == info.projectname {
  249. ss = ss + "p1-项目名称-"
  250. p1 = true
  251. }
  252. if v.buyer != "" && v.buyer == info.buyer {
  253. ss = ss + "p2-单位-"
  254. p2 = true
  255. }
  256. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  257. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  258. ss = ss + "p3-编号组--"
  259. p3 = true
  260. }
  261. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount, info.bidamount) {
  262. ss = ss + "p5-中标金-"
  263. p5 = true
  264. }
  265. if v.winner != "" && v.winner == info.winner {
  266. ss = ss + "p6-中标人-"
  267. p6 = true
  268. }
  269. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  270. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  271. ss = ss + "p11-标题-"
  272. p11 = true
  273. }
  274. if (p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
  275. (p1 && p2 && p11) || (p1 && p3 && p11) ||
  276. (p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
  277. (p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
  278. (p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
  279. (p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
  280. (p5 && p6 && p11) {
  281. reason = reason + "满足中标A,3要素组合-" + ss + ","
  282. return true, reason
  283. }
  284. return false, reason
  285. }
  286. // 中标_B
  287. func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  288. m, n := 0, 0
  289. if v.projectname != "" && v.projectname == info.projectname {
  290. m++
  291. n++
  292. }
  293. if v.buyer != "" && v.buyer == info.buyer {
  294. m++
  295. }
  296. if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
  297. (v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
  298. m++
  299. }
  300. if v.bidamount != 0 && !isBidWinningAmount(v.bidamount, info.bidamount) {
  301. m++
  302. }
  303. if v.winner != "" && v.winner == info.winner {
  304. m++
  305. }
  306. if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
  307. (strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
  308. m++
  309. n++
  310. }
  311. if m >= 2 {
  312. if n == 2 && m == 2 {
  313. return false, reason
  314. } else {
  315. reason = reason + "满足中标B.六选二,"
  316. return true, reason
  317. }
  318. }
  319. return false, reason
  320. }
  321. // 中标_C
  322. func winningRepeat_C(v *Info, info *Info) bool {
  323. if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount, info.bidamount) {
  324. //避免抽错金额-
  325. if ((v.projectcode != "" && info.projectcode != "" && v.projectcode == info.projectcode) ||
  326. (v.contractnumber != "" && info.contractnumber != "" && v.contractnumber == info.contractnumber)) &&
  327. (v.winner != "" && info.winner != "" && v.winner == info.winner) {
  328. return false
  329. }
  330. return true
  331. }
  332. if v.winner != "" && info.winner != "" && v.winner != info.winner {
  333. return true
  334. }
  335. return false
  336. }
  337. // 合同_A
  338. func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
  339. isMeet_1 := false
  340. if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
  341. return true, reason
  342. }
  343. isMeet_2 := false
  344. if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
  345. return true, reason
  346. }
  347. return false, reason
  348. }
  349. // 合同_B
  350. func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
  351. isMeet_1 := false
  352. if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
  353. return true, reason
  354. }
  355. isMeet_2 := false
  356. if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
  357. return true, reason
  358. }
  359. return false, reason
  360. }
  361. // 合同_C
  362. func contractRepeat_C(v *Info, info *Info) bool {
  363. if tenderRepeat_C(v, info) {
  364. return true
  365. }
  366. if winningRepeat_C(v, info) {
  367. return true
  368. }
  369. //合同类 - 新增编号
  370. if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
  371. return true
  372. }
  373. if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
  374. return true
  375. }
  376. return false
  377. }
  378. // 是否相似
  379. func isTheSimilarName(name1 string, name2 string) bool {
  380. if strings.Contains(name1, name2) || strings.Contains(name2, name1) {
  381. return true
  382. }
  383. return false
  384. }
  385. // 快速低质量数据判重
  386. func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
  387. //区间间隔24小时
  388. if !isTimeIntervalPeriod(v.publishtime, info.publishtime) {
  389. return false, reason
  390. }
  391. //首先判定是否为低质量数据 info目标数据
  392. if info.title != "" && (info.agency == "" || v.agency == "") &&
  393. (info.title == v.title) &&
  394. (info.projectcode == "" || info.projectcode == v.projectcode) &&
  395. info.contractnumber == "" && info.buyer == "" {
  396. isValue := 0 //五要素判断
  397. if info.projectname != "" { //项目名称
  398. isValue++
  399. }
  400. if info.budget != 0 { //预算
  401. isValue++
  402. }
  403. if info.winner != "" { //中标单位
  404. isValue++
  405. }
  406. if info.bidamount != 0 { //中标金额
  407. isValue++
  408. }
  409. if isValue == 0 {
  410. reason = reason + "---低质量-要素均为空-标题满足"
  411. return true, reason
  412. } else if isValue == 1 {
  413. isMeet := false
  414. if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
  415. reason = reason + "---低质量-有且一个要素组合"
  416. return true, reason
  417. }
  418. } else if isValue == 2 {
  419. if info.subtype == "采购意向" { //特殊
  420. if info.projectname != "" && info.projectname == v.projectname &&
  421. info.budget != 0 && info.budget == v.budget &&
  422. info.city != "" && info.city == v.city {
  423. reason = reason + "---采购意向~同城~预算~名称均一致"
  424. return true, reason
  425. }
  426. }
  427. } else {
  428. }
  429. }
  430. return false, reason
  431. }
  432. // 类别细节原因记录
  433. func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
  434. if info.projectname != "" && isTheSimilarName(info.projectname, v.projectname) {
  435. reason = reason + "---项目名称"
  436. return true, reason
  437. }
  438. if info.budget != 0 && info.budget == v.budget { //预算
  439. reason = reason + "---预算"
  440. return true, reason
  441. }
  442. if v.winner != "" && info.winner == v.winner { //中标单位
  443. reason = reason + "---中标单位"
  444. return true, reason
  445. }
  446. if v.bidamount != 0 && info.bidamount == v.bidamount { //中标金额
  447. reason = reason + "---中标金额"
  448. return true, reason
  449. }
  450. return false, reason
  451. }