c_pcode.go 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. package clean
  2. import (
  3. "regexp"
  4. "strings"
  5. "unicode"
  6. "unicode/utf8"
  7. )
  8. // 限制
  9. var codeUnConReg *regexp.Regexp = regexp.MustCompile("(null|勘察|包件|测试|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天)")
  10. var codeUnLenReg *regexp.Regexp = regexp.MustCompile("([\u4e00-\u9fa5]{7,})")
  11. // 符合
  12. var codeMeetReg1 *regexp.Regexp = regexp.MustCompile("(第[0-9]+号)")
  13. // 清洗项目编号
  14. func CleanPcode(pcode string, fns []string) string {
  15. if utf8.RuneCountInString(pcode) < 5 {
  16. return ""
  17. }
  18. pcode = fieldReg1.ReplaceAllString(pcode, "")
  19. pcode = pcodeReg1.ReplaceAllString(pcode, "")
  20. pcode = pcodeReg2.ReplaceAllString(pcode, "")
  21. //符合条件
  22. if codeMeetReg1.MatchString(pcode) {
  23. return pcode
  24. }
  25. //舍弃条件
  26. if codeUnConReg.MatchString(pcode) || codeUnLenReg.MatchString(pcode) || !isAlphanumeric(pcode) || isRegTimeDateCode(pcode) {
  27. return ""
  28. }
  29. //校验与附件名字否是一致-舍弃
  30. for _, v := range fns {
  31. if utf8.RuneCountInString(v) >= utf8.RuneCountInString(pcode) {
  32. if strings.Contains(v, pcode) {
  33. return ""
  34. }
  35. }
  36. }
  37. return pcode
  38. }
  39. // 清洗其他编号
  40. func CleanOtherCode(ocode string) string {
  41. if utf8.RuneCountInString(ocode) < 5 {
  42. return ""
  43. }
  44. ocode = fieldReg1.ReplaceAllString(ocode, "")
  45. ocode = pcodeReg1.ReplaceAllString(ocode, "")
  46. ocode = pcodeReg2.ReplaceAllString(ocode, "")
  47. //符合条件
  48. if codeMeetReg1.MatchString(ocode) {
  49. return ocode
  50. }
  51. //舍弃条件
  52. if codeUnConReg.MatchString(ocode) || codeUnLenReg.MatchString(ocode) || !isAlphanumeric(ocode) || isRegTimeDateCode(ocode) {
  53. return ""
  54. }
  55. return ocode
  56. }
  57. // 是否含字母数字
  58. func isAlphanumeric(str string) bool {
  59. var count int
  60. for _, v := range str {
  61. if unicode.IsNumber(v) || unicode.IsLetter(v) {
  62. count++
  63. break
  64. }
  65. }
  66. return count > 0
  67. }
  68. // 连续数字
  69. func isRegTimeDateCode(str string) bool {
  70. reg := `\d{8}`
  71. regx, _ := regexp.Compile(reg)
  72. if regx.FindString(str) != "" {
  73. return false
  74. }
  75. if utf8.RuneCountInString(str) == 8 {
  76. return true
  77. }
  78. return false
  79. }