|
@@ -253,6 +253,7 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock, isSite
|
|
|
title = filterTitle(title)
|
|
|
//分割标题 [和及]。。。 参与
|
|
|
splitTitles := ProcTitle(title)
|
|
|
+ blockText = mergetext(splitTitles, blockText)
|
|
|
block := &util.Block{
|
|
|
Index: index, //序号
|
|
|
Text: blockText, //内容
|
|
@@ -319,6 +320,32 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock, isSite
|
|
|
return returnBlocks, returnValue
|
|
|
}
|
|
|
|
|
|
+func mergetext(titles []string, text string) string {
|
|
|
+ if len(titles) == 0 || utf8.RuneCountInString(text) > 150 {
|
|
|
+ return text
|
|
|
+ }
|
|
|
+ splitLenstrs := strings.Split(text, "\n")
|
|
|
+ if len(splitLenstrs) == 1 || len(titles) != len(splitLenstrs)-1 {
|
|
|
+ return text
|
|
|
+ }
|
|
|
+ tt := ""
|
|
|
+ for i, v := range splitLenstrs[1:] {
|
|
|
+ lentexts := regDivision.Split(v, -1)
|
|
|
+ if len(lentexts) == 2 {
|
|
|
+ if strings.Contains(titles[i], lentexts[0]) {
|
|
|
+ tt += titles[i] + ":" + lentexts[1] + "\n"
|
|
|
+ }else if strings.Contains(titles[i], lentexts[0]) ||strings.Contains(titles[i], lentexts[0]){
|
|
|
+ tt += titles[i] + ":" + lentexts[1] + "\n"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(tt) == 0 {
|
|
|
+ return text
|
|
|
+ } else {
|
|
|
+ return tt
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
//块标题处理
|
|
|
func ProcTitle(title string) []string {
|
|
|
if title == "" {
|
|
@@ -345,6 +372,14 @@ func ProcTitle(title string) []string {
|
|
|
}
|
|
|
ara[kk] = start + vv
|
|
|
}
|
|
|
+ } else if vv == "联系人" || vv == "联系方式" {
|
|
|
+ if strings.Contains(prev, "代理") {
|
|
|
+ ara[kk] = "代理机构" + vv
|
|
|
+ } else if strings.Contains(prev, "中标") {
|
|
|
+ ara[kk] = "中标单位" + vv
|
|
|
+ } else if strings.Contains(prev, "采购") {
|
|
|
+ ara[kk] = "采购单位" + vv
|
|
|
+ }
|
|
|
}
|
|
|
if len([]rune(vv)) > 3 {
|
|
|
if direct == -1 {
|