|
@@ -11,6 +11,7 @@ import (
|
|
|
"regexp"
|
|
|
"strings"
|
|
|
"sync"
|
|
|
+ "unicode/utf8"
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
)
|
|
@@ -288,7 +289,7 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR, isSite bool, codeSite string)
|
|
|
|
|
|
//对td单元格值判断是否是表头和根据td内容长度进行分块处理
|
|
|
func (td *TD) tdIsHb(tr *TR, table *Table, bsontable, isSite bool, codeSite string) {
|
|
|
- lenval := len([]rune(td.Val)) //经过处理的td内容长度
|
|
|
+ lenval := utf8.RuneCountInString(td.Val)//经过处理的td内容长度
|
|
|
//if lentxt > 9 {
|
|
|
//td.KV = GetKVAll(txt, "")
|
|
|
ub := []*u.Block{}
|
|
@@ -385,7 +386,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable, isSite bool, codeSite stri
|
|
|
if !bsontable {
|
|
|
txt := repSpace.ReplaceAllString(td.Val, "")
|
|
|
btw, must, _, _, repl := CheckHeader(txt)
|
|
|
- if lenval > 15 && !strings.Contains(txt, "采购代理机构名称、地址和联系方式") {
|
|
|
+ if lenval > 18 {
|
|
|
btw = false
|
|
|
}
|
|
|
if strings.Contains(td.Val, "个项目") || strings.Contains(td.Val, "奥图码") {
|