|
@@ -133,7 +133,9 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1, k2 []str
|
|
|
if sv, sok := v.(string); sok { //取KV
|
|
|
v1 = sv
|
|
|
} else if sv, sok := v.([]string); sok { //是数组先默认取第一个
|
|
|
- v1 = sv[0]
|
|
|
+ if len(sv) >= 1 {
|
|
|
+ v1 = sv[0]
|
|
|
+ }
|
|
|
}
|
|
|
//对值单位的处理 (预算|费|价|额|规模|投资)
|
|
|
if moneyreg.MatchString(tk) {
|
|
@@ -228,6 +230,10 @@ func (table *Table) KVFilter() {
|
|
|
continue
|
|
|
}
|
|
|
v := table.SortKV.Map[k]
|
|
|
+ if table.SortKVWeight[k] == -99 { //td格式化kv降低权重
|
|
|
+ as.AddKey(k, v)
|
|
|
+ continue
|
|
|
+ }
|
|
|
if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
|
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
|
k1, n_k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
@@ -257,7 +263,7 @@ func (table *Table) KVFilter() {
|
|
|
} else {
|
|
|
if table.StandKV[k] == "" && qutil.ObjToString(v) != "" {
|
|
|
table.StandKV[k] = qutil.ObjToString(v)
|
|
|
- table.StandKVWeight[k] = 0
|
|
|
+ table.StandKVWeight[k] = -99
|
|
|
}
|
|
|
}
|
|
|
for _, n_k2 := range n_k1 {
|
|
@@ -347,6 +353,9 @@ func (table *Table) KVFilter() {
|
|
|
func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
|
|
|
checkKey := map[int]bool{}
|
|
|
for kn, k := range as.Keys { //遍历table.SortKV.value为数组的key
|
|
|
+ if strings.TrimSpace(table.StandKV[k]) != "" {
|
|
|
+ continue
|
|
|
+ }
|
|
|
v := as.Map[k]
|
|
|
if vm, ok := v.([]map[string]interface{}); ok && k == NullTxtBid {
|
|
|
if table.WinnerOrder == nil {
|
|
@@ -646,11 +655,11 @@ func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype
|
|
|
tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
|
|
|
//可以有多个table
|
|
|
//for _, table := range tabs {
|
|
|
- //隐藏表格跳过
|
|
|
- if IsHide(tabs) {
|
|
|
- return
|
|
|
- }
|
|
|
- tabres.GoqueryTabs = tabs
|
|
|
+ //隐藏表格跳过
|
|
|
+ if IsHide(tabs) {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ tabres.GoqueryTabs = tabs
|
|
|
//}
|
|
|
//解析表格集
|
|
|
tabres.Analy()
|
|
@@ -665,16 +674,16 @@ func (ts *TableResult) Analy() {
|
|
|
MatchMap: map[string]map[string]bool{},
|
|
|
}
|
|
|
//for _, table := range ts.GoqueryTabs {
|
|
|
- tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
|
|
|
- //核心模块
|
|
|
- tsw := tn.Analy(contactFormat)
|
|
|
- for _, tab := range tsw {
|
|
|
- if len(tab.TRs) > 0 {
|
|
|
- tabs = append(tabs, tab)
|
|
|
- }
|
|
|
- //fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
|
|
|
+ tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
|
|
|
+ //核心模块
|
|
|
+ tsw := tn.Analy(contactFormat)
|
|
|
+ for _, tab := range tsw {
|
|
|
+ if len(tab.TRs) > 0 {
|
|
|
+ tabs = append(tabs, tab)
|
|
|
}
|
|
|
- //tn.SonTables = append(tn.SonTables, tn)
|
|
|
+ //fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
|
|
|
+ }
|
|
|
+ //tn.SonTables = append(tn.SonTables, tn)
|
|
|
//}
|
|
|
//统一合并,考虑统一多表格是多包的情况---新增
|
|
|
if len(tabs) > 1 {
|
|
@@ -789,7 +798,7 @@ func (table *Table) createTabe(trs *goquery.Selection) {
|
|
|
td := NewTD(selm, TR, table) //初始化td,kv处理,td中有table处理,td的方向
|
|
|
//num++
|
|
|
TR.AddTD(td)
|
|
|
- if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0{ //删除一个tr,tr中所有td是空值的
|
|
|
+ if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0 { //删除一个tr,tr中所有td是空值的
|
|
|
empty++
|
|
|
if tds.Size() == empty {
|
|
|
tdTextIsNull = true
|
|
@@ -1479,6 +1488,7 @@ func (table *Table) FindKV() {
|
|
|
continue
|
|
|
}
|
|
|
table.SortKV.AddKey(tdk, tdv)
|
|
|
+ table.SortKVWeight[tdk] = -99
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -3185,7 +3195,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
|
|
|
for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|
|
|
val := table.SortKV.Map[key]
|
|
|
key = regReplAllSpace.ReplaceAllString(key, "")
|
|
|
- key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
|
+ key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
|
if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
|
|
|
/*
|
|
|
{
|