|
@@ -47,26 +47,26 @@ var (
|
|
regTrimSpace = regexp.MustCompile("^[\u3000\u2003\u00a0\\s]+|[\u3000\u2003\u00a0\\s]+$")
|
|
regTrimSpace = regexp.MustCompile("^[\u3000\u2003\u00a0\\s]+|[\u3000\u2003\u00a0\\s]+$")
|
|
regReplWrapSpace = regexp.MustCompile("^[\r\n][\u3000\u2003\u00a0\\s]*|[\r\n][\u3000\u2003\u00a0\\s]*$")
|
|
regReplWrapSpace = regexp.MustCompile("^[\r\n][\u3000\u2003\u00a0\\s]*|[\r\n][\u3000\u2003\u00a0\\s]*$")
|
|
regReplAllSymbol = regexp.MustCompile("[(\\(<《【\\[{{〔)\\)>》】\\]}}〕,,;;::'\"“”。.\\??/+=\\-_——*&……\\^%$¥@#!!`~·]")
|
|
regReplAllSymbol = regexp.MustCompile("[(\\(<《【\\[{{〔)\\)>》】\\]}}〕,,;;::'\"“”。.\\??/+=\\-_——*&……\\^%$¥@#!!`~·]")
|
|
- regFilterTitle = regexp.MustCompile("[(\\(<《【\\[{{〔].+?[)\\)>》】\\]}}〕]")
|
|
|
|
- regDivision = regexp.MustCompile("[::]")
|
|
|
|
- regSpliteSegment = regexp.MustCompile("[\r\n]")
|
|
|
|
- regFilterNumber = regexp.MustCompile("^[\\d一二三四五六七八九十]+")
|
|
|
|
- regSplit = regexp.MustCompile("或|和|以?及|与|、|或")
|
|
|
|
- regStartWrap = regexp.MustCompile("^[\r\n]")
|
|
|
|
- regEndWrap = regexp.MustCompile("[\r\n]$")
|
|
|
|
- regMoreWrap = regexp.MustCompile("[\r\n]{2,}")
|
|
|
|
- regStrWrap = regexp.MustCompile("分包名称[::]")
|
|
|
|
- regBZJWarap = regexp.MustCompile("(保证金.*|每包[0-9]*元|标志|一包一投|上包|标线|国标|第[\\d一二三四五六七八九十]标室)")
|
|
|
|
- regFJWarap = regexp.MustCompile("[a-zA-Z0-9](包|标段)[公告附件]*.(pdf|PDF|docx|doc|DOCX|DOC)")
|
|
|
|
- regAZWarap = regexp.MustCompile("(标[a-zA-Z]取值|标段划分|标液|分包个数|物资[\\d一二三四五六七八九十]?包)")
|
|
|
|
- replSerial = regexp.MustCompile("(\r\n|^)([\\d一二三四五六七八九十][、..::,])+\\d")
|
|
|
|
- moreColonReg = regexp.MustCompile("[::]+")
|
|
|
|
- regFilter = regexp.MustCompile("等$")
|
|
|
|
- pkgFilter = regexp.MustCompile("第[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ](子|合同|分|施工|监理)?(标段?|包|合同段|标包)|(子|合同|分|施工|监理)?(标|包)(段|号)?")
|
|
|
|
- indexTile = regexp.MustCompile("[0-9.]{2,3}[\\s\u4e00-\u9fa5]{2,8}[::]+") //小标题
|
|
|
|
- indexTile2 = regexp.MustCompile("[\\s\u4e00-\u9fa5]{2,8}")
|
|
|
|
- regReplAllSpace2 = regexp.MustCompile("[\u3000\u2003\u00a0\\s0-9.::、\\(\\)]+")
|
|
|
|
- confusion = map[string]string{
|
|
|
|
|
|
+ regFilterTitle = regexp.MustCompile("[(\\(<《【\\[{{〔].+?[)\\)>》】\\]}}〕]")
|
|
|
|
+ regDivision = regexp.MustCompile("[::]")
|
|
|
|
+ regSpliteSegment = regexp.MustCompile("[\r\n]")
|
|
|
|
+ regFilterNumber = regexp.MustCompile("^[\\d一二三四五六七八九十]+")
|
|
|
|
+ regSplit = regexp.MustCompile("或|和|以?及|与|、|或")
|
|
|
|
+ regStartWrap = regexp.MustCompile("^[\r\n]")
|
|
|
|
+ regEndWrap = regexp.MustCompile("[\r\n]$")
|
|
|
|
+ regMoreWrap = regexp.MustCompile("[\r\n]{2,}")
|
|
|
|
+ regStrWrap = regexp.MustCompile("分包名称[::]")
|
|
|
|
+ regBZJWarap = regexp.MustCompile("(保证金.*|每包[0-9]*元|标志|一包一投|上包|标线|国标|第[\\d一二三四五六七八九十]标室)")
|
|
|
|
+ regFJWarap = regexp.MustCompile("[a-zA-Z0-9](包|标段)[公告附件]*.(pdf|PDF|docx|doc|DOCX|DOC)")
|
|
|
|
+ regAZWarap = regexp.MustCompile("(标[a-zA-Z]取值|标段划分|标液|分包个数|物资[\\d一二三四五六七八九十]?包)")
|
|
|
|
+ replSerial = regexp.MustCompile("(\r\n|^)([\\d一二三四五六七八九十][、..::,])+\\d")
|
|
|
|
+ moreColonReg = regexp.MustCompile("[::]+")
|
|
|
|
+ regFilter = regexp.MustCompile("等$")
|
|
|
|
+ pkgFilter = regexp.MustCompile("第[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ](子|合同|分|施工|监理)?(标段?|包|合同段|标包)|(子|合同|分|施工|监理)?(标|包)(段|号)+")
|
|
|
|
+ indexTile = regexp.MustCompile("[0-9.]{2,3}[\\s\u4e00-\u9fa5]{2,8}[::]+") //小标题
|
|
|
|
+ indexTile2 = regexp.MustCompile("[\\s\u4e00-\u9fa5]{2,8}")
|
|
|
|
+ regReplAllSpace2 = regexp.MustCompile("[\u3000\u2003\u00a0\\s0-9.::、\\(\\)]+")
|
|
|
|
+ confusion = map[string]string{
|
|
"参与": "canyu",
|
|
"参与": "canyu",
|
|
}
|
|
}
|
|
//查找分包之前,先对内容进行预处理
|
|
//查找分包之前,先对内容进行预处理
|
|
@@ -792,7 +792,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
tagtitle = regReplAllSpace2.ReplaceAllString(tagtitle, "")
|
|
tagtitle = regReplAllSpace2.ReplaceAllString(tagtitle, "")
|
|
if tagtitle == "" {
|
|
if tagtitle == "" {
|
|
tagtitle = title
|
|
tagtitle = title
|
|
- } else if strings.Contains(tagtitle, bv[0]) && title != "" {
|
|
|
|
|
|
+ } else if strings.Contains(tagtitle, bv[0]) && title != "" {
|
|
tagtitle = title
|
|
tagtitle = title
|
|
}
|
|
}
|
|
text = tagtitle + ":" + text
|
|
text = tagtitle + ":" + text
|