|
@@ -125,8 +125,8 @@ func IsHide(g *goquery.Selection) (b bool) {
|
|
|
|
|
|
//对表格的key进行标准化处理,多个k相同时,出现覆盖问题
|
|
|
//待扩展,暂不支持正则标签库
|
|
|
-func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1 []string, weight []int, v1, returntag string, b bool) {
|
|
|
- k1 = []string{}
|
|
|
+func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1, k2 []string, weight []int, v1, returntag string, b bool) {
|
|
|
+ k1, k2 = []string{}, []string{}
|
|
|
weight = []int{}
|
|
|
tk := k
|
|
|
if sv, sok := v.(string); sok { //取KV
|
|
@@ -177,6 +177,8 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1 []string,
|
|
|
returntag = "中标情况"
|
|
|
}
|
|
|
b = true
|
|
|
+ } else {
|
|
|
+ k2 = append(k2, k)
|
|
|
}
|
|
|
}
|
|
|
//对上一步没有取到标准化key的进一步处理
|
|
@@ -227,7 +229,7 @@ func (table *Table) KVFilter() {
|
|
|
v := table.SortKV.Map[k]
|
|
|
if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
|
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
|
- k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
|
+ k1, n_k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
|
//qutil.Debug(k, v, k1, w1, v1, tag, b)
|
|
|
if b {
|
|
|
//降低冒号值的权重
|
|
@@ -257,6 +259,9 @@ func (table *Table) KVFilter() {
|
|
|
table.StandKVWeight[k] = 0
|
|
|
}
|
|
|
}
|
|
|
+ for _, n_k2 := range n_k1 {
|
|
|
+ table.SortKV.NotTagKey[n_k2] = true
|
|
|
+ }
|
|
|
} else {
|
|
|
//u.Debug(k, v, "---------")
|
|
|
as.AddKey(k, v)
|
|
@@ -473,7 +478,7 @@ func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v)
|
|
|
+ k1, n_k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v)
|
|
|
if b {
|
|
|
if tag != "" && table.Tag == "" {
|
|
|
table.Tag = tag
|
|
@@ -489,6 +494,10 @@ func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
|
|
|
// }
|
|
|
// }
|
|
|
}
|
|
|
+ } else {
|
|
|
+ for _, n_k2 := range n_k1 {
|
|
|
+ table.SortKV.NotTagKey[n_k2] = true
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -659,7 +668,7 @@ func (ts *TableResult) Analy() {
|
|
|
//核心模块
|
|
|
ts := tn.Analy(contactFormat)
|
|
|
for _, tab := range ts {
|
|
|
- if len(tab.TRs) > 0{
|
|
|
+ if len(tab.TRs) > 0 {
|
|
|
tabs = append(tabs, tab)
|
|
|
}
|
|
|
//fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
|
|
@@ -845,6 +854,9 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
|
table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
}
|
|
|
}
|
|
|
+ for k, v := range table.SortKV.NotTagKey {
|
|
|
+ table.TableResult.SortKV.NotTagKey[k] = v
|
|
|
+ }
|
|
|
//u.Debug(str)
|
|
|
}
|
|
|
}
|
|
@@ -2079,7 +2091,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
|
|
|
}
|
|
|
} else if val, bvs := v1.(string); bvs && len(index) == 1 {
|
|
|
//删除子包的kv
|
|
|
- k1tags, _, _, _, _ := CommonDataAnaly(k1, "", "", val)
|
|
|
+ k1tags, _, _, _, _, _ := CommonDataAnaly(k1, "", "", val)
|
|
|
if len(k1tags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(k1tags[0]) { //(k1tags[0].Value == "采购单位" || k1tags[0].Value == "项目编号")) {
|
|
|
//log.Println("remove", k1, val)
|
|
|
tn.SortKV.RemoveKey(k1)
|
|
@@ -2301,7 +2313,7 @@ func (tn *Table) assemblePackage(k1, v1, key string) {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
if v1 != "" {
|
|
|
- k2, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
|
|
|
+ k2, _, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
|
|
|
if bf {
|
|
|
for pos, k3 := range k2 {
|
|
|
if bp.TableKV.Kv != nil && bp.TableKV.KvTag[k3] != nil && (bp.TableKV.Kv[k3] == "" || w1[pos] > bp.TableKV.KvTag[k3].Weight) {
|
|
@@ -3169,7 +3181,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
|
|
|
for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|
|
|
val := table.SortKV.Map[key]
|
|
|
key = regReplAllSpace.ReplaceAllString(key, "")
|
|
|
- key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
|
+ key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
|
if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
|
|
|
/*
|
|
|
{
|