dataMethod.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. package main
  2. import (
  3. "math"
  4. qutil "qfw/util"
  5. "regexp"
  6. "strings"
  7. )
  8. //完善判重数据检测-前置条件
  9. func convertArabicNumeralsAndLetters(data string) string {
  10. newData :=data
  11. res1, _ := regexp.Compile("[a-zA-Z]+");
  12. if res1.MatchString(data) {
  13. newData = res1.ReplaceAllStringFunc(data, strings.ToUpper);
  14. }
  15. res2, _ := regexp.Compile("[0-9]+");
  16. if res2.MatchString(newData) {
  17. arr1:=[]string {"0","1","2","3","4","5","6","7","8","9"}
  18. arr2:=[]string {"零","一","二","三","四","五","六","七","八","九"}
  19. for i:=0 ;i<len(arr1) ;i++ {
  20. resTemp ,_:=regexp.Compile(arr1[i])
  21. newData= resTemp.ReplaceAllString(newData, arr2[i]);
  22. }
  23. }
  24. return newData
  25. }
  26. func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
  27. newStr1:=str1
  28. newStr2:=str2
  29. res, _ := regexp.Compile("重新招标");
  30. if res.MatchString(newStr1) {
  31. newStr1 = res.ReplaceAllString(newStr1,"重招");
  32. }
  33. if res.MatchString(newStr2) {
  34. newStr2 = res.ReplaceAllString(newStr2,"重招");
  35. }
  36. return newStr1,newStr2
  37. }
  38. //关键词数量v
  39. func dealWithSpecialWordNumber(info*Info,v*Info) int {
  40. okNum:=0
  41. if info.titleSpecialWord || info.specialWord {
  42. okNum++
  43. }
  44. if v.titleSpecialWord || v.specialWord {
  45. okNum++
  46. }
  47. return okNum
  48. }
  49. //关键词再次判断
  50. func againRepeat(v *Info, info *Info ,site bool) bool {
  51. if isPublishtimeInterval(info.publishtime,v.publishtime) && site {
  52. return true
  53. }
  54. if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
  55. return true
  56. }
  57. if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
  58. return true
  59. }
  60. if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
  61. return true
  62. }
  63. if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
  64. return true
  65. }
  66. if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
  67. return true
  68. }
  69. if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
  70. return true
  71. }
  72. if v.title != info.title && v.title != "" && info.title != ""{
  73. if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
  74. return true
  75. }
  76. }
  77. if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
  78. return true
  79. }
  80. return false
  81. }
  82. //均含有关键词再次判断
  83. func againContainSpecialWord (v *Info, info *Info) bool {
  84. if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
  85. return true
  86. }
  87. if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
  88. return true
  89. }
  90. if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
  91. return true
  92. }
  93. if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
  94. return true
  95. }
  96. if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
  97. return true
  98. }
  99. if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
  100. return true
  101. }
  102. //提取标题-标段号处理
  103. if dealTitleSpecial(v.title,info.title) {
  104. return true
  105. }
  106. return false
  107. }
  108. //提取标题-标段号处理
  109. func dealTitleSpecial(title1 string,title2 string) bool{
  110. regular1 := "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789]+[))]?"
  111. regular2 := "[0-9a-zA-Z一二三四五六七八九十零123456789]+(包|标段|标包)"
  112. regx1_1,_ := regexp.Compile(regular1)
  113. str1:=regx1_1.FindString(title1)
  114. if str1!="" {
  115. //log.Println("标题1,规则一提取:",str1)
  116. }else {
  117. regx1_2,_ := regexp.Compile(regular2)
  118. str1=regx1_2.FindString(title1)
  119. if str1!="" {
  120. //log.Println("标题1,规则二提取:",str1)
  121. }
  122. }
  123. regx2_1,_ := regexp.Compile(regular1)
  124. str2:=regx2_1.FindString(title2)
  125. if str2!="" {
  126. //log.Println("标题2,规则一提取:",str2)
  127. }else {
  128. regx2_2,_ := regexp.Compile(regular2)
  129. str2=regx2_2.FindString(title2)
  130. if str2!="" {
  131. //log.Println("标题2,规则二提取:",str2)
  132. }
  133. }
  134. //根据提取的结果,在进行清洗
  135. if str1!="" {
  136. str1 = deleteExtraSpace(str1)
  137. str1= strings.Replace(str1, "(", "", -1)
  138. str1= strings.Replace(str1, "(", "", -1)
  139. str1= strings.Replace(str1, ")", "", -1)
  140. str1= strings.Replace(str1, ")", "", -1)
  141. str1 = convertArabicNumeralsAndLetters(str1)
  142. }
  143. if str2!="" {
  144. str2 = deleteExtraSpace(str2)
  145. str2= strings.Replace(str2, "(", "", -1)
  146. str2= strings.Replace(str2, "(", "", -1)
  147. str2= strings.Replace(str2, ")", "", -1)
  148. str2= strings.Replace(str2, ")", "", -1)
  149. str2 = convertArabicNumeralsAndLetters(str2)
  150. }
  151. //log.Println("最终:",str1,str2)
  152. if str1!=str2 {
  153. //log.Println("不一致")
  154. return true
  155. }else {
  156. //log.Println("一致")
  157. return false
  158. }
  159. }
  160. //删除中标单位字符串中多余的空格(含tab)
  161. func deleteExtraSpace(s string) string {
  162. //删除字符串中的多余空格,有多个空格时,仅保留一个空格
  163. s1 := strings.Replace(s, " ", " ", -1) //替换tab为空格
  164. regstr := "\\s{2,}" //两个及两个以上空格的正则表达式
  165. reg, _ := regexp.Compile(regstr) //编译正则表达式
  166. s2 := make([]byte, len(s1)) //定义字符数组切片
  167. copy(s2, s1) //将字符串复制到切片
  168. spc_index := reg.FindStringIndex(string(s2)) //在字符串中搜索
  169. for len(spc_index) > 0 { //找到适配项
  170. s2 = append(s2[:spc_index[0]+1], s2[spc_index[1]:]...) //删除多余空格
  171. spc_index = reg.FindStringIndex(string(s2)) //继续在字符串中搜索
  172. }
  173. return string(s2)
  174. }
  175. //中标金额倍率:10000
  176. func isBidWinningAmount(f1 float64 ,f2 float64) bool {
  177. if f1==f2||f1*10000==f2||f2*10000==f1 {
  178. return false
  179. }
  180. return true
  181. }
  182. //时间间隔周期
  183. func isTimeIntervalPeriod(i1 int64 ,i2 int64) bool {
  184. if math.Abs(float64(i1-i2)) < 172800.0 {
  185. return true
  186. }else {
  187. return false //大于48小时
  188. }
  189. }
  190. //开标时间区间为一天
  191. func isBidopentimeInterval(i1 int64 ,i2 int64) bool {
  192. if i1==0||i2==0 {
  193. return false
  194. }
  195. //不在同一天-或者同一天间隔超过六小时,属于不相等返回true
  196. timeOne,timeTwo:=i1,i2
  197. day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
  198. day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
  199. if day1==day2 {
  200. //是否间隔超过十二小时
  201. if math.Abs(float64(i1-i2)) >43200.0 {
  202. return true
  203. }else {
  204. return false
  205. }
  206. }else {
  207. return true
  208. }
  209. }
  210. //发布时间区间为一天
  211. func isPublishtimeInterval(i1 int64 ,i2 int64) bool {
  212. if i1==0||i2==0 {
  213. return false
  214. }
  215. //不在同一天-或者同一天间隔超过12小时,属于不相等返回true
  216. timeOne,timeTwo:=i1,i2
  217. day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
  218. day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
  219. if day1==day2 {
  220. //是否间隔超过十二小时
  221. if math.Abs(float64(i1-i2)) >=43200.0 {
  222. return true
  223. }else {
  224. return false
  225. }
  226. }else {
  227. return true
  228. }
  229. }
  230. //开标时间区间为一天
  231. func isTheSameDay(i1 int64 ,i2 int64) bool {
  232. if i1==0||i2==0 {
  233. return false
  234. }
  235. timeOne,timeTwo:=i1,i2
  236. day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
  237. day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
  238. if day1==day2 {
  239. return true
  240. }
  241. //if math.Abs(float64(i1-i2)) <=86400.0 {
  242. // return true
  243. //}
  244. return false
  245. }
  246. //前置0 五要素均相等认为重复
  247. func leadingElementSame(v *Info, info *Info) bool {
  248. isok:= 0
  249. if info.projectname != "" && v.projectname == info.projectname {
  250. isok++
  251. }
  252. if info.buyer != "" && v.buyer == info.buyer {
  253. isok++
  254. }
  255. if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
  256. if info.contractnumber != "" && v.contractnumber == info.contractnumber {
  257. isok++
  258. }
  259. }else {
  260. if info.projectcode != "" && v.projectcode == info.projectcode {
  261. isok++
  262. }
  263. }
  264. if info.title != "" && v.title == info.title {
  265. isok++
  266. }
  267. if v.agency == info.agency &&info.agency != "" {
  268. isok++
  269. }
  270. if v.winner == info.winner&&info.winner != "" {
  271. isok++
  272. }
  273. if isok>=5 {
  274. return true
  275. }
  276. return false
  277. }
  278. //buyer的优先级
  279. func buyerIsContinue(v *Info, info *Info) bool {
  280. if !isTheSameDay(info.publishtime,v.publishtime) {
  281. return true
  282. }
  283. if v.title != info.title && v.title != "" && info.title != ""{
  284. if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
  285. return true
  286. }
  287. }
  288. if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
  289. return true
  290. }
  291. //if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
  292. // return true
  293. //}
  294. //if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
  295. // return true
  296. //}
  297. //if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
  298. // return true
  299. //}
  300. if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
  301. return true
  302. }
  303. if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
  304. return true
  305. }
  306. return false
  307. }
  308. //无效数据
  309. func invalidData(d1 string, d2 string, d3 string, d4 string) bool {
  310. var n int
  311. if d1 != "" {
  312. n++
  313. }
  314. if d2 != "" {
  315. n++
  316. }
  317. if d3 != "" {
  318. n++
  319. }
  320. if d4 != "" {
  321. n++
  322. }
  323. if n == 0 {
  324. return true
  325. }
  326. return false
  327. }