|
@@ -47,26 +47,26 @@ var (
|
|
|
regTrimSpace = regexp.MustCompile("^[\u3000\u2003\u00a0\\s]+|[\u3000\u2003\u00a0\\s]+$")
|
|
|
regReplWrapSpace = regexp.MustCompile("^[\r\n][\u3000\u2003\u00a0\\s]*|[\r\n][\u3000\u2003\u00a0\\s]*$")
|
|
|
regReplAllSymbol = regexp.MustCompile("[(\\(<《【\\[{{〔)\\)>》】\\]}}〕,,;;::'\"“”。.\\??/+=\\-_——*&……\\^%$¥@#!!`~·]")
|
|
|
- regFilterTitle = regexp.MustCompile("[(\\(<《【\\[{{〔].+?[)\\)>》】\\]}}〕]")
|
|
|
- regDivision = regexp.MustCompile("[::]")
|
|
|
- regSpliteSegment = regexp.MustCompile("[\r\n]")
|
|
|
- regFilterNumber = regexp.MustCompile("^[\\d一二三四五六七八九十]+")
|
|
|
- regSplit = regexp.MustCompile("或|和|以?及|与|、|或")
|
|
|
- regStartWrap = regexp.MustCompile("^[\r\n]")
|
|
|
- regEndWrap = regexp.MustCompile("[\r\n]$")
|
|
|
- regMoreWrap = regexp.MustCompile("[\r\n]{2,}")
|
|
|
- regStrWrap = regexp.MustCompile("分包名称[::]")
|
|
|
- regBZJWarap = regexp.MustCompile("保证金.*")
|
|
|
- regPDFWarap = regexp.MustCompile("[a-zA-Z](包|标段).(pdf|PDF)")
|
|
|
- regAZWarap = regexp.MustCompile("(标[a-zA-Z]取值|标段划分)")
|
|
|
- replSerial = regexp.MustCompile("(\r\n|^)([\\d一二三四五六七八九十][、..::,])+\\d")
|
|
|
- moreColonReg = regexp.MustCompile("[::]+")
|
|
|
- regFilter = regexp.MustCompile("等$")
|
|
|
- pkgFilter = regexp.MustCompile("第[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ](子|合同|分|施工|监理)?(标段?|包|合同段|标包)|(子|合同|分|施工|监理)?(标|包)(段|号)?")
|
|
|
- indexTile = regexp.MustCompile("[0-9.]{2,3}[\\s\u4e00-\u9fa5]{2,8}[::]+") //小标题
|
|
|
- indexTile2 = regexp.MustCompile("[\\s\u4e00-\u9fa5]{2,8}")
|
|
|
- regReplAllSpace2 = regexp.MustCompile("[\u3000\u2003\u00a0\\s0-9.::、\\(\\)]+")
|
|
|
- confusion = map[string]string{
|
|
|
+ regFilterTitle = regexp.MustCompile("[(\\(<《【\\[{{〔].+?[)\\)>》】\\]}}〕]")
|
|
|
+ regDivision = regexp.MustCompile("[::]")
|
|
|
+ regSpliteSegment = regexp.MustCompile("[\r\n]")
|
|
|
+ regFilterNumber = regexp.MustCompile("^[\\d一二三四五六七八九十]+")
|
|
|
+ regSplit = regexp.MustCompile("或|和|以?及|与|、|或")
|
|
|
+ regStartWrap = regexp.MustCompile("^[\r\n]")
|
|
|
+ regEndWrap = regexp.MustCompile("[\r\n]$")
|
|
|
+ regMoreWrap = regexp.MustCompile("[\r\n]{2,}")
|
|
|
+ regStrWrap = regexp.MustCompile("分包名称[::]")
|
|
|
+ regBZJWarap = regexp.MustCompile("(保证金.*|每包[0-9]*元|标志|一包一投|上包|标线|国标|第[\\d一二三四五六七八九十]标室)")
|
|
|
+ regFJWarap = regexp.MustCompile("[a-zA-Z0-9](包|标段)[公告附件]*.(pdf|PDF|docx|doc|DOCX|DOC)")
|
|
|
+ regAZWarap = regexp.MustCompile("(标[a-zA-Z]取值|标段划分|标液|分包个数|物资[\\d一二三四五六七八九十]?包)")
|
|
|
+ replSerial = regexp.MustCompile("(\r\n|^)([\\d一二三四五六七八九十][、..::,])+\\d")
|
|
|
+ moreColonReg = regexp.MustCompile("[::]+")
|
|
|
+ regFilter = regexp.MustCompile("等$")
|
|
|
+ pkgFilter = regexp.MustCompile("第[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ](子|合同|分|施工|监理)?(标段?|包|合同段|标包)|(子|合同|分|施工|监理)?(标|包)(段|号)?")
|
|
|
+ indexTile = regexp.MustCompile("[0-9.]{2,3}[\\s\u4e00-\u9fa5]{2,8}[::]+") //小标题
|
|
|
+ indexTile2 = regexp.MustCompile("[\\s\u4e00-\u9fa5]{2,8}")
|
|
|
+ regReplAllSpace2 = regexp.MustCompile("[\u3000\u2003\u00a0\\s0-9.::、\\(\\)]+")
|
|
|
+ confusion = map[string]string{
|
|
|
"参与": "canyu",
|
|
|
}
|
|
|
//查找分包之前,先对内容进行预处理
|
|
@@ -653,7 +653,7 @@ func FindPackageFromText(title string, content string, isSite bool, codeSite str
|
|
|
//分块之后分包
|
|
|
func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool, isSite bool, codeSite string) (bool, string) {
|
|
|
//查找知否有分包
|
|
|
- content = regPDFWarap.ReplaceAllString(content, "\n")
|
|
|
+ content = regFJWarap.ReplaceAllString(content, "\n")
|
|
|
content = regAZWarap.ReplaceAllString(content, "\n")
|
|
|
content = regStrWrap.ReplaceAllString(content, "\n")
|
|
|
content = regMoreWrap.ReplaceAllString(content, "\n")
|
|
@@ -672,7 +672,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
if len(pkg) == 1 && strings.HasSuffix(con, v[0]) {
|
|
|
return false, ""
|
|
|
}
|
|
|
- is := regexp.MustCompile(v[0] + "[::]*").FindAllStringIndex(con, -1)
|
|
|
+ is := regexp.MustCompile(v[0]+"[::]*").FindAllStringIndex(con, -1)
|
|
|
for _, sv := range is {
|
|
|
appendWarpIndex = append(appendWarpIndex, sv[0])
|
|
|
}
|
|
@@ -716,13 +716,13 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
indexPkgMap[sv[0]] = v[0]
|
|
|
}
|
|
|
//key在包前面,并且在一行的开头
|
|
|
- keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)" + pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
+ keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
if len(keys) == 0 {
|
|
|
//key在包前面,并且key以冒号结尾
|
|
|
- keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])" + pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
+ keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
}
|
|
|
if len(keys) == 0 {
|
|
|
- keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])" + pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
+ keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
}
|
|
|
for _, key := range keys {
|
|
|
startEndMap[key[5]] = key[4]
|
|
@@ -776,9 +776,10 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
}
|
|
|
index := util.PackageNumberConvert(bk)
|
|
|
//去掉前缀,空格必须要加,分kv的时候要用
|
|
|
- text = regexp.MustCompile(bv[0] + "[::]*").ReplaceAllString(text, "")
|
|
|
- if strings.TrimLeft(tmptext, bv[0]) == text || strings.TrimLeft(tmptext, bv[0]+":") == text {
|
|
|
+ text = regexp.MustCompile(bv[0]+"[::]*").ReplaceAllString(text, "")
|
|
|
+ if strings.TrimLeft(tmptext, bv[0]) == text || strings.TrimLeft(tmptext, bv[0]+":") == text || strings.TrimLeft(tmptext, bv[0]+":") == text {
|
|
|
var tagtitle string
|
|
|
+
|
|
|
for i, v := range titleindexs {
|
|
|
if i == 0 {
|
|
|
continue
|
|
@@ -789,6 +790,11 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
}
|
|
|
}
|
|
|
tagtitle = regReplAllSpace2.ReplaceAllString(tagtitle, "")
|
|
|
+ if tagtitle == "" {
|
|
|
+ tagtitle = title
|
|
|
+ } else if strings.Contains(tagtitle, bv[0]) && title != "" {
|
|
|
+ tagtitle = title
|
|
|
+ }
|
|
|
text = tagtitle + ":" + text
|
|
|
}
|
|
|
headKey := ""
|