multipackage.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. package pretreated
  2. import (
  3. "regexp"
  4. "sort"
  5. "strings"
  6. )
  7. var (
  8. /**
  9. 监理 施工没有处理
  10. **/
  11. //替换容易混淆的词
  12. PreReg = regexp.MustCompile("(同|每|对|[^其]中|仅|任意)[一二三四五六七八九十\\d]个?(子|合同|分|施工|监理)?(标段?|包)|项目标号|文件A包|涉及包号|包件号|0\\s?个标段|1\\-[\\d]标段|子包(\\d、)+\\d|\\d\\.\\d(标段|包)[^一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]|[1-9]标。")
  13. PreReg1 = regexp.MustCompile("[^\n]([A-Z]?([一二三四五六七八九十]|\\d)、)+[A-Z]?([一二三四五六七八九十]|\\d)(标段?|包)")
  14. //有分包划分情况的直接对比是1的肯定不是分包
  15. PreCheckMulti = regexp.MustCompile("[^第]([一二三四五六七八九十两0-9ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)[  \u3000\u2003\u00a0]*个?((子|合同|分|施工|监理)?(标段?|包|合同段|标包))进行([一二三四五六七八九十两0-9ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)|(划分|分[设为成]?|共[分设有计]?)[::]?[  \u3000\u2003\u00a0]*([一二三四五六七八九十两0-9ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)[  \u3000\u2003\u00a0]*个?((子|合同|分|施工|监理)?(标段?|包|合同段|标包|项目))")
  16. //替换容易混淆的词
  17. PreCon = regexp.MustCompile("([\r\n]|^)[\u3000\u2003\u00a0\\s]*(^标项)(\\d\\.)+\\d|[一二三四五1-9、.]+[  \u3000\u2003\u00a0]*((标段|分包)(划分|情况)|(标书))|([上下]一[条页篇][::]?[^,,。\\n]{0,120}|备注[::][^\\n]{0,120}|业绩[::][^\\n,。,]{0,80}|三包(手册|服务|政策|凭证|期|标准|规定|责任|要求|售后)|(要求|提供|质量|国家|享受|负责|实行|执行|承诺|门前|法定|规定).{0,6}三包|“三包”|\\d+万?([个套只支分名][^标包])|[?]|[((]请?注意[::][^((]+[))])")
  18. PreCon2 = regexp.MustCompile("[评中开定]\\s?标\\s?[0-9一二三四五六七八九十]+|标[准尺高书注]|[^中]标价|[开鼠投招军指企目]标|包[括含装为内]|[承树]包|CA证书|地点[::].*标|.{30,}合同段")
  19. PreCon3 = regexp.MustCompile("(标段[一二三四五六七八九十0-9A-Za-z])[((].*[))][::]")
  20. //提取分包标识
  21. MultiReg = regexp.MustCompile("(([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-])+(包|标段|分标))[::]?|(?:^|\\n)([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+(包))|([第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)#?((子|合同|分|施工|监理)?(标段?|合同段|标包)))|(((子|合同|施工|监理|标包|标|包)(标|包段|项|段|组)?)[     ]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))|((项目|包件)([一二三四五六七八九十1-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+))|((包组|包件)[::\\s]+([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))|((施工|监理)(标段))[::\n]")
  22. PreCon4 = regexp.MustCompile("([一二三四五六七八九十]标段[::¥0-9.]*(万元)?)[、]?")
  23. Precon4dw = regexp.MustCompile("(万元|元)")
  24. //匹配到的包格式分类统计
  25. keyregs = []map[*regexp.Regexp]int{
  26. map[*regexp.Regexp]int{
  27. regexp.MustCompile("^[一二三四五六七八九十]+$"): 8,
  28. },
  29. map[*regexp.Regexp]int{
  30. regexp.MustCompile("^[0-9]+$"): 7,
  31. },
  32. map[*regexp.Regexp]int{
  33. regexp.MustCompile("^[A-Za-z]+[0-9]*$"): 6,
  34. },
  35. }
  36. //冒号处理优先级高 如标段一:
  37. MH = regexp.MustCompile("[::]")
  38. //匹配包有的时候类似 包2LN2的只保留前面数字
  39. ignoreReg = regexp.MustCompile("^(\\d+)[A-Za-z].*")
  40. //标题中含分包特征的标段一、二、三或包1、包2之类,根据
  41. TitleReg = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ、\\-~至]+(子|合同|分|施工|监理|标)?[包标段][号段]?[、]?)+|((子|合同|分|施工|监理|标)?[包标段][号段]?[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ、\\-~至]+[、]?)+|(子|合同|分|施工|监理|标)?[包标段][号段]?[a-zA-Z0-9]+[\\-~-至、](子|合同|分|施工|监理|标)?[包标段][号段]?[a-zA-Z0-9]+")
  42. clearPkgFlag = regexp.MustCompile("^[\\-]+|[\\-]+$")
  43. //无效冗余包名
  44. cleanPkgName = regexp.MustCompile("^(1[-][23456789]包|一[-][二三四五六七八九]包|(施工|监理)BIM)$")
  45. cleanPkgCon = regexp.MustCompile("(不足三家.*包采购.*终止)")
  46. )
  47. //判断分包
  48. func CheckMultiPackage(con string) (content string, m map[string][]string, b bool) {
  49. m = map[string][]string{}
  50. con = PreReg.ReplaceAllString(con, "")
  51. con = PreReg1.ReplaceAllString(con, "")
  52. content = con
  53. con = PreCheckMulti.ReplaceAllString(con, "")
  54. con = PreCon.ReplaceAllString(con, "\n")
  55. con = PreCon2.ReplaceAllString(con, "")
  56. con = PreCon3.ReplaceAllString(con, "${1}:")
  57. //替换敏感词-分割
  58. con = replaceSenstiveReg1.ReplaceAllString(con, "$1\n$2")
  59. //修改 \nX标段
  60. res := MultiReg.FindAllStringSubmatch(con, -1)
  61. if len(res) > 0 {
  62. //1-3 45 6-10 11-15 16-18 19-21 22-24
  63. mindex := map[string]int{}
  64. for index, v := range res {
  65. k := v[1]
  66. vindex := 2
  67. if k == "" {
  68. k = v[4]
  69. vindex = 5
  70. }
  71. if k == "" {
  72. k = v[6]
  73. vindex = 7
  74. }
  75. if k == "" {
  76. k = v[11]
  77. vindex = 12
  78. }
  79. if k == "" {
  80. k = v[16]
  81. vindex = 17
  82. }
  83. if k == "" {
  84. k = v[19]
  85. vindex = 20
  86. }
  87. if k == "" {
  88. k = v[22]
  89. vindex = 23
  90. }
  91. if k != "" && v[vindex] != "" && vindex != 5 {
  92. vindex += 1
  93. }
  94. if len(m[k]) == 0 && k != "" && v[vindex] != "" {
  95. k = ignoreReg.ReplaceAllString(k, "$1")
  96. k = clearPkgFlag.ReplaceAllString(k, "")
  97. m[k] = []string{clearPkgFlag.ReplaceAllString(strings.TrimSpace(v[0]), ""), v[vindex]}
  98. mindex[k] = index
  99. }
  100. }
  101. //过滤无效包
  102. new_m := map[string][]string{}
  103. for k, v := range m {
  104. if cleanPkgName.MatchString(k) {
  105. continue
  106. }
  107. new_m[k] = v
  108. }
  109. m = new_m
  110. if len(m) > 1 {
  111. //对k优先级进行处理过滤
  112. SEL := -1 //确定以哪种类型为标段标识,没有去判断v相同不相同,存在一定的误判!如 1:合同包1 1:包
  113. mapclassstr := map[int]map[string][]string{}
  114. mapclass := map[int]int{}
  115. for k, v := range m {
  116. str := res[mindex[k]][0]
  117. mk := 5
  118. for _, keyreg := range keyregs {
  119. for reg, pos := range keyreg {
  120. if reg.MatchString(k) {
  121. mk = pos
  122. break
  123. }
  124. }
  125. }
  126. mapclass[mk]++
  127. if mapclassstr[mk] == nil {
  128. mapclassstr[mk] = map[string][]string{}
  129. }
  130. mapclassstr[mk][k] = []string{v[0], v[1]}
  131. if MH.MatchString(str) { //如果有冒号直接确定
  132. SEL = mk
  133. }
  134. }
  135. //log.Println(mapclassstr, mapclass, SEL)
  136. if SEL > 0 {
  137. m = mapclassstr[SEL]
  138. } else {
  139. //比较出哪个最多,倒排,如果都一样,按 9 8 7 6 5来处理
  140. max := 0
  141. maxk := []int{}
  142. for k, v := range mapclass {
  143. if v > max {
  144. max = v
  145. maxk = []int{}
  146. maxk = append(maxk, k)
  147. } else if v == max {
  148. maxk = append(maxk, k)
  149. }
  150. }
  151. if len(maxk) > 0 {
  152. sort.Ints(maxk)
  153. m = mapclassstr[maxk[len(maxk)-1]]
  154. }
  155. }
  156. }
  157. if len(m) > 0 {
  158. b = true
  159. }
  160. //log.Println(m, res)
  161. }
  162. return
  163. }