|
@@ -63,7 +63,6 @@ type TD struct {
|
|
|
Val string //值
|
|
|
Text string //原始串
|
|
|
SortKV *SortMap //存放kv值
|
|
|
- SortKVWeight map[string]int //存放kv值权重
|
|
|
Html string //html值
|
|
|
BH bool //是否是表头
|
|
|
MustBH bool //不能修改的表头
|
|
@@ -91,12 +90,11 @@ var dwReg = regexp.MustCompile("单位[::/ \\s\u3000\u2003\u00a0\\n]*([万亿
|
|
|
func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
|
|
|
defer qutil.Catch()
|
|
|
td := &TD{
|
|
|
- ArrVal: []string{},
|
|
|
- Goquery: Goquery,
|
|
|
- SonTds: []*TD{},
|
|
|
- TR: tr,
|
|
|
- SortKV: NewSortMap(),
|
|
|
- SortKVWeight: map[string]int{},
|
|
|
+ ArrVal: []string{},
|
|
|
+ Goquery: Goquery,
|
|
|
+ SonTds: []*TD{},
|
|
|
+ TR: tr,
|
|
|
+ SortKV: NewSortMap(),
|
|
|
}
|
|
|
colspan, rowspan := 0, 0
|
|
|
col, bcol := td.Goquery.Attr("colspan")
|
|
@@ -145,20 +143,28 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
|
|
|
td.SortKV.AddKey(bl_sk, bl_sv)
|
|
|
}
|
|
|
}
|
|
|
+ } else {
|
|
|
+ //for _, v := range GetKVAll(txt, "", nil, 2).KvTags {
|
|
|
+ //for _, vv := range v {
|
|
|
+ //td.SortKV.AddKey(vv.Key, vv.Value)
|
|
|
+ //}
|
|
|
+ //}
|
|
|
}
|
|
|
//抽取不到走正则抽
|
|
|
proCode := projectcodeReg.FindString(text)
|
|
|
if proCode != "" {
|
|
|
ckv := GetKVAll(proCode, "", nil, 1)
|
|
|
- for k, v := range ckv.KvTags {
|
|
|
- td.SortKV.AddKey(k, v)
|
|
|
- td.SortKVWeight[k] = -99
|
|
|
+ for _, v := range ckv.KvTags {
|
|
|
+ for _, vv := range v {
|
|
|
+ td.SortKV.AddKey(vv.Key, vv.Value)
|
|
|
+ }
|
|
|
}
|
|
|
} else if proCode = projectcodeReg2.FindString(text); proCode != "" {
|
|
|
ckv := GetKVAll(proCode, "", nil, 1)
|
|
|
- for k, v := range ckv.KvTags {
|
|
|
- td.SortKV.AddKey(k, v)
|
|
|
- td.SortKVWeight[k] = -99
|
|
|
+ for _, v := range ckv.KvTags {
|
|
|
+ for _, vv := range v {
|
|
|
+ td.SortKV.AddKey(vv.Key, vv.Value)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
if proCode = jsonReg.FindString(text); proCode != "" {
|
|
@@ -166,7 +172,6 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
|
|
|
json.Unmarshal([]byte(proCode), &jsonMap)
|
|
|
for k, v := range jsonMap {
|
|
|
td.SortKV.AddKey(k, v)
|
|
|
- td.SortKVWeight[k] = -99
|
|
|
}
|
|
|
}
|
|
|
//对td单元格值判断是否是表头和根据td内容长度进行分块处理
|