|
@@ -3,6 +3,7 @@ package pretreated
|
|
|
//定义表格对象
|
|
|
|
|
|
import (
|
|
|
+ "encoding/json"
|
|
|
"fmt"
|
|
|
u "jy/util"
|
|
|
"log"
|
|
@@ -146,16 +147,6 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- }else {
|
|
|
- //调用kv解析
|
|
|
- cKV := GetKVAll(txt, "", nil, 1)
|
|
|
- for k,v :=range cKV.Kv{
|
|
|
- td.SortKV.AddKey(k,v)
|
|
|
- }
|
|
|
- sKV := SspacekvEntity.Entrance(txt, "", nil)
|
|
|
- for k,v :=range sKV.Kv{
|
|
|
- td.SortKV.AddKey(k,v)
|
|
|
- }
|
|
|
}
|
|
|
} else {
|
|
|
txt = strings.TrimSpace(td.Goquery.Text())
|
|
@@ -163,6 +154,33 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
|
|
|
text := dwReg.ReplaceAllString(u.TrimLRAll(txt, ""), "$1")
|
|
|
td.Val = text //值
|
|
|
td.Text = txt //原始串
|
|
|
+ //调用kv解析
|
|
|
+ cKV := GetKVAll(text, "", nil, 1)
|
|
|
+ for k,v :=range cKV.Kv{
|
|
|
+ td.SortKV.AddKey(k,v)
|
|
|
+ }
|
|
|
+ sKV := SspacekvEntity.Entrance(text, "", nil)
|
|
|
+ for k,v :=range sKV.Kv{
|
|
|
+ td.SortKV.AddKey(k,v)
|
|
|
+ }
|
|
|
+ proCode := projectcodeReg.FindString(text)
|
|
|
+ if proCode != "" {
|
|
|
+ ckv := GetKVAll(proCode, "", nil, 1)
|
|
|
+ for k,v :=range ckv.Kv{
|
|
|
+ td.SortKV.AddKey(k,v)
|
|
|
+ }
|
|
|
+ }else if proCode = projectcodeReg2.FindString(text);proCode !=""{
|
|
|
+ ckv := GetKVAll(proCode, "", nil, 1)
|
|
|
+ for k,v :=range ckv.Kv{
|
|
|
+ td.SortKV.AddKey(k,v)
|
|
|
+ }
|
|
|
+ }else if proCode = jsonReg.FindString(text);proCode != ""{
|
|
|
+ jsonMap := make(map[string]string)
|
|
|
+ json.Unmarshal([]byte(proCode),&jsonMap)
|
|
|
+ for k,v := range jsonMap{
|
|
|
+ td.SortKV.AddKey(k,v)
|
|
|
+ }
|
|
|
+ }
|
|
|
//对td单元格值判断是否是表头和根据td内容长度进行分块处理
|
|
|
td.tdIsHb(tr, table, bsontable)
|
|
|
bhead := false
|
|
@@ -859,7 +877,11 @@ func ComputeConRatio(con string, strtype int) (tabs []*goquery.Selection, ratio
|
|
|
**/
|
|
|
return
|
|
|
}
|
|
|
-
|
|
|
+//纯文本
|
|
|
+func HtmlToText(con string) string {
|
|
|
+ doc2, _ := goquery.NewDocumentFromReader(strings.NewReader(con))
|
|
|
+ return doc2.Text()
|
|
|
+}
|
|
|
//取出排除表格之外的文本
|
|
|
func TextAfterRemoveTable(con string) string {
|
|
|
doc2, _ := goquery.NewDocumentFromReader(strings.NewReader(con))
|