|
@@ -3,7 +3,6 @@ package pretreated
|
|
|
import (
|
|
|
"fmt"
|
|
|
u "jy/util"
|
|
|
- "log"
|
|
|
qutil "qfw/util"
|
|
|
"regexp"
|
|
|
"strings"
|
|
@@ -126,27 +125,27 @@ func IsHide(g *goquery.Selection) (b bool) {
|
|
|
|
|
|
//对表格的key进行标准化处理,多个k相同时,出现覆盖问题
|
|
|
//待扩展,暂不支持正则标签库
|
|
|
-func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1, k2 []string, weight []int, v1, returntag string, b bool) {
|
|
|
- k1, k2 = []string{}, []string{}
|
|
|
- weight = []int{}
|
|
|
- tk := k
|
|
|
+func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (kvTags map[string][]*u.Tag, returntag string) {
|
|
|
+ kvTags = map[string][]*u.Tag{}
|
|
|
+ v1 := ""
|
|
|
if sv, sok := v.(string); sok { //取KV
|
|
|
v1 = sv
|
|
|
} else if sv, sok := v.([]string); sok { //是数组先默认取第一个
|
|
|
v1 = sv[0]
|
|
|
}
|
|
|
//对值单位的处理 (预算|费|价|额|规模|投资)
|
|
|
- if moneyreg.MatchString(tk) {
|
|
|
- v1 += GetMoneyUnit(tk, v1)
|
|
|
+ if moneyreg.MatchString(k) {
|
|
|
+ v1 += GetMoneyUnit(k, v1)
|
|
|
}
|
|
|
//先清理key
|
|
|
//u.Debug(1, k, v1)
|
|
|
- k = ClearKey(k, 2)
|
|
|
+ k1 := ClearKey(k, 2)
|
|
|
//u.Debug(2, k)
|
|
|
//取标准key
|
|
|
- res := u.GetTags(k)
|
|
|
- if len(res) == 0 && tk != k {
|
|
|
- res = u.GetTags(tk)
|
|
|
+ res := u.GetTags(k1)
|
|
|
+ if len(res) == 0 && k1 != k {
|
|
|
+ res = u.GetTags(k)
|
|
|
+ k1 = k
|
|
|
}
|
|
|
//log.Println(k, res)
|
|
|
// if len(res) == 0 {
|
|
@@ -154,50 +153,44 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1, k2 []str
|
|
|
// }
|
|
|
//当取到标准化值时,放入数组
|
|
|
if len(res) > 0 {
|
|
|
- b = true
|
|
|
for _, t1 := range res {
|
|
|
- k1 = append(k1, t1.Value)
|
|
|
- weight = append(weight, t1.Weight)
|
|
|
+ //降低冒号值的权重
|
|
|
+ if MhSpilt.MatchString(v1) {
|
|
|
+ t1.Weight -= 50
|
|
|
+ }
|
|
|
+ kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
|
|
|
}
|
|
|
//k1 = res[0].Value
|
|
|
- }
|
|
|
- //没有取到标准化key时,对中标金额和中标单位的逻辑处理
|
|
|
- if !b {
|
|
|
+ } else {
|
|
|
+ kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
|
|
|
+ //没有取到标准化key时,对中标金额和中标单位的逻辑处理
|
|
|
if filter_zbje_k.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
|
|
|
if tabletag == "" {
|
|
|
returntag = "中标情况"
|
|
|
}
|
|
|
- k1 = append(k1, "中标金额")
|
|
|
- weight = append(weight, -100)
|
|
|
- b = true
|
|
|
+ kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: "中标金额", Value: v1, Weight: -100})
|
|
|
} else if filter_zbdw_ky.MatchString(k) && !filter_zbdw_kn.MatchString(k) &&
|
|
|
filter_zbdw_v.MatchString(v1) {
|
|
|
- k1 = append(k1, "中标单位")
|
|
|
- weight = append(weight, -100)
|
|
|
+ kvTags["中标单位"] = append(kvTags["中标单位"], &u.Tag{Key: "中标单位", Value: v1, Weight: -100})
|
|
|
if tabletag == "" {
|
|
|
returntag = "中标情况"
|
|
|
}
|
|
|
- b = true
|
|
|
} else {
|
|
|
- k2 = append(k2, k)
|
|
|
- }
|
|
|
- }
|
|
|
- //对上一步没有取到标准化key的进一步处理
|
|
|
- if !b {
|
|
|
- if tabletag == "" {
|
|
|
+ //对上一步没有取到标准化key的进一步处理
|
|
|
+ if tabletag == "" {
|
|
|
|
|
|
- }
|
|
|
- if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
|
|
|
- //u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
|
|
|
- if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
|
|
|
- k1 = append(k1, "中标金额")
|
|
|
- weight = append(weight, -100)
|
|
|
- b = true
|
|
|
- } /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
|
|
|
- k1 = append(k1, "中标单位")
|
|
|
- weight = append(weight, -100)
|
|
|
- b = true
|
|
|
- }*/
|
|
|
+ }
|
|
|
+ if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
|
|
|
+ //u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
|
|
|
+ if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
|
|
|
+ kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: "中标金额", Value: v1, Weight: -100})
|
|
|
+
|
|
|
+ } /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
|
|
|
+ k1 = append(k1, "中标单位")
|
|
|
+ weight = append(weight, -100)
|
|
|
+ b = true
|
|
|
+ }*/
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
return
|
|
@@ -230,38 +223,19 @@ func (table *Table) KVFilter() {
|
|
|
v := table.SortKV.Map[k]
|
|
|
if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
|
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
|
- k1, n_k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
|
+ kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
|
//qutil.Debug(k, v, k1, w1, v1, tag, b)
|
|
|
- if b {
|
|
|
- //降低冒号值的权重
|
|
|
- if MhSpilt.MatchString(v1) {
|
|
|
- for pos, _ := range k1 {
|
|
|
- w1[pos] -= 50
|
|
|
- }
|
|
|
- }
|
|
|
- if tag != "" && table.Tag == "" {
|
|
|
- table.Tag = tag
|
|
|
- }
|
|
|
- for pos, k2 := range k1 { //根据关键词,过滤table.SortKV到table.StandKV和table.StandKVWeight
|
|
|
- if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] {
|
|
|
- table.StandKV[k2] = v1 //本节点
|
|
|
- table.StandKVWeight[k2] = w1[pos]
|
|
|
- }
|
|
|
- // else if k2 == "中标金额" {
|
|
|
- // // u.Debug(qutil.Float64All(v1), qutil.Float64All(table.StandKV[k2]))
|
|
|
- // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
|
|
|
- // table.StandKV[k2] = v1
|
|
|
- // }
|
|
|
- // }
|
|
|
- }
|
|
|
- } else {
|
|
|
- if table.StandKV[k] == "" && qutil.ObjToString(v) != "" {
|
|
|
- table.StandKV[k] = qutil.ObjToString(v)
|
|
|
- table.StandKVWeight[k] = 0
|
|
|
- }
|
|
|
- }
|
|
|
- for _, n_k2 := range n_k1 {
|
|
|
- table.SortKV.NotTagKey[n_k2] = true
|
|
|
+ if tag != "" && table.Tag == "" {
|
|
|
+ table.Tag = tag
|
|
|
+ }
|
|
|
+ for kk, vv := range kvTags { //根据关键词,过滤table.SortKV到table.StandKV和table.StandKVWeight
|
|
|
+ table.StandKV[kk] = append(table.StandKV[kk], vv...)
|
|
|
+ // else if k2 == "中标金额" {
|
|
|
+ // // u.Debug(qutil.Float64All(v1), qutil.Float64All(table.StandKV[k2]))
|
|
|
+ // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
|
|
|
+ // table.StandKV[k2] = v1
|
|
|
+ // }
|
|
|
+ // }
|
|
|
}
|
|
|
} else {
|
|
|
//u.Debug(k, v, "---------")
|
|
@@ -315,11 +289,10 @@ func (table *Table) KVFilter() {
|
|
|
if len(table.WinnerOrder) > 0 {
|
|
|
//中标候选人合并
|
|
|
winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
|
|
|
- if table.StandKV["中标单位"] == "" {
|
|
|
+ if len(table.StandKV["中标单位"]) == 0 {
|
|
|
ent := table.WinnerOrder[0]["entname"]
|
|
|
if ent != nil {
|
|
|
- table.StandKV["中标单位"], _ = ent.(string)
|
|
|
- table.StandKVWeight["中标单位"] = -25
|
|
|
+ table.StandKV["中标单位"] = append(table.StandKV["中标单位"], &u.Tag{Key: "中标单位", Value: qutil.ObjToString(ent), Weight: -25})
|
|
|
}
|
|
|
}
|
|
|
} else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
|
|
@@ -479,26 +452,17 @@ func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- k1, n_k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v)
|
|
|
- if b {
|
|
|
- if tag != "" && table.Tag == "" {
|
|
|
- table.Tag = tag
|
|
|
- }
|
|
|
- for pos, k2 := range k1 {
|
|
|
- if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] {
|
|
|
- table.StandKV[k2] = v1 //本节点
|
|
|
- table.StandKVWeight[k2] = w1[pos]
|
|
|
- }
|
|
|
- // else if k2 == "中标金额" {
|
|
|
- // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
|
|
|
- // table.StandKV[k2] = v1
|
|
|
- // }
|
|
|
- // }
|
|
|
- }
|
|
|
- } else {
|
|
|
- for _, n_k2 := range n_k1 {
|
|
|
- table.SortKV.NotTagKey[n_k2] = true
|
|
|
- }
|
|
|
+ kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v)
|
|
|
+ if tag != "" && table.Tag == "" {
|
|
|
+ table.Tag = tag
|
|
|
+ }
|
|
|
+ for kk, vv := range kvTags {
|
|
|
+ table.StandKV[kk] = append(table.StandKV[kk], vv...)
|
|
|
+ // else if k2 == "中标金额" {
|
|
|
+ // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
|
|
|
+ // table.StandKV[k2] = v1
|
|
|
+ // }
|
|
|
+ // }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -538,11 +502,8 @@ func (table *Table) analyTdKV() {
|
|
|
//u.Debug(td.BH, td.Val, td.SonTableResult)
|
|
|
if td.SonTableResult != nil {
|
|
|
//u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs)
|
|
|
- for _, k3 := range td.SonTableResult.SortKV.Keys {
|
|
|
- if table.StandKV[k3] == "" || td.SonTableResult.SortKVWeight[k3] > table.StandKVWeight[k3] {
|
|
|
- table.StandKV[k3] = qutil.ObjToString(td.SonTableResult.SortKV.Map[k3])
|
|
|
- table.StandKVWeight[k3] = td.SonTableResult.SortKVWeight[k3]
|
|
|
- }
|
|
|
+ for k3, v3 := range td.SonTableResult.KvTags {
|
|
|
+ table.StandKV[k3] = append(table.StandKV[k3], v3...)
|
|
|
}
|
|
|
//中标候选人排序
|
|
|
if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 {
|
|
@@ -570,15 +531,23 @@ func (table *Table) MergerToTableresult() {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
v1 := v.(*u.BlockPackage)
|
|
|
- if v1.TableKV != nil && v1.TableKV.Kv != nil {
|
|
|
- for k2, v2 := range v1.TableKV.Kv {
|
|
|
+ if v1.TableKV != nil && len(v1.TableKV.KvTags) > 0 {
|
|
|
+ for k2, v2 := range v1.TableKV.KvTags {
|
|
|
if bp.TableKV == nil {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
- if bp.TableKV.Kv[k2] == "" || (v1.TableKV.KvTag[k2] != nil && bp.TableKV.KvTag[k2] != nil && v1.TableKV.KvTag[k2].Weight > bp.TableKV.KvTag[k2].Weight) {
|
|
|
- //可能会报错 assignment to entry in nil map
|
|
|
- bp.TableKV.Kv[k2] = v2
|
|
|
- bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ isExists := false
|
|
|
+ for _, v2v := range v2 {
|
|
|
+ for _, v2vv := range bp.TableKV.KvTags[k2] {
|
|
|
+ if v2v.Value == v2vv.Value {
|
|
|
+ isExists = true
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !isExists {
|
|
|
+ bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
|
|
|
+ bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -594,25 +563,12 @@ func (table *Table) MergerToTableresult() {
|
|
|
// u.Debug(table, table.TableResult, str)
|
|
|
}
|
|
|
//遍历标准key到tableresult.sortkv中
|
|
|
- for k, v := range table.StandKV {
|
|
|
- if table.TableResult.SortKV.Map[k] == nil || table.StandKVWeight[k] > table.TableResult.SortKVWeight[k] || strings.Contains(table.Tag, "变更") {
|
|
|
- v = strings.Replace(v, "__", "", -1)
|
|
|
- if table.TableResult.SortKV.Map[k] == nil {
|
|
|
- table.TableResult.SortKV.AddKey(k, v) //父集
|
|
|
- } else {
|
|
|
- if k == "项目编号" { //项目编号存在,又匹配到全为中文跳过
|
|
|
-
|
|
|
- if regHz.MatchString(v) {
|
|
|
- continue
|
|
|
- }
|
|
|
- }
|
|
|
- table.TableResult.SortKV.ReplaceKey(k, v, k)
|
|
|
- }
|
|
|
- table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
- } else if table.TableResult.SortKV.Map[k] != nil {
|
|
|
- //u.Debug(k, v, table.TableResult.SortKV.Map[k], "..............")
|
|
|
+ for _, v := range table.StandKV {
|
|
|
+ for _, vv := range v {
|
|
|
+ vv.Value = strings.Replace(vv.Value, "__", "", -1)
|
|
|
}
|
|
|
}
|
|
|
+ MergeKvTags(table.TableResult.KvTags, table.StandKV)
|
|
|
//表格的块标签
|
|
|
if table.TableResult.BlockTag == "" && table.Tag != "" {
|
|
|
table.TableResult.BlockTag = table.Tag
|
|
@@ -646,11 +602,11 @@ func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype
|
|
|
tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
|
|
|
//可以有多个table
|
|
|
//for _, table := range tabs {
|
|
|
- //隐藏表格跳过
|
|
|
- if IsHide(tabs) {
|
|
|
- return
|
|
|
- }
|
|
|
- tabres.GoqueryTabs = tabs
|
|
|
+ //隐藏表格跳过
|
|
|
+ if IsHide(tabs) {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ tabres.GoqueryTabs = tabs
|
|
|
//}
|
|
|
//解析表格集
|
|
|
tabres.Analy()
|
|
@@ -665,26 +621,29 @@ func (ts *TableResult) Analy() {
|
|
|
MatchMap: map[string]map[string]bool{},
|
|
|
}
|
|
|
//for _, table := range ts.GoqueryTabs {
|
|
|
- tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
|
|
|
- //核心模块
|
|
|
- tsw := tn.Analy(contactFormat)
|
|
|
- for _, tab := range tsw {
|
|
|
- if len(tab.TRs) > 0 {
|
|
|
- tabs = append(tabs, tab)
|
|
|
- }
|
|
|
- //fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
|
|
|
+ tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
|
|
|
+ //核心模块
|
|
|
+ tsw := tn.Analy(contactFormat)
|
|
|
+ for _, tab := range tsw {
|
|
|
+ if len(tab.TRs) > 0 {
|
|
|
+ tabs = append(tabs, tab)
|
|
|
}
|
|
|
- //tn.SonTables = append(tn.SonTables, tn)
|
|
|
+ //fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
|
|
|
+ }
|
|
|
+ //tn.SonTables = append(tn.SonTables, tn)
|
|
|
//}
|
|
|
//统一合并,考虑统一多表格是多包的情况---新增
|
|
|
if len(tabs) > 1 {
|
|
|
pns := map[string]string{}
|
|
|
pnarr := []string{}
|
|
|
for _, table := range tabs {
|
|
|
- pn := table.StandKV["项目名称"]
|
|
|
- if pn != "" && TitleReg.MatchString(pn) {
|
|
|
- pnarr = append(pnarr, pn)
|
|
|
- matchres := TitleReg.FindAllStringSubmatch(pn, -1)
|
|
|
+ if len(table.StandKV["项目名称"]) == 0 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ pn := table.StandKV["项目名称"][0]
|
|
|
+ if pn != nil && pn.Value != "" && TitleReg.MatchString(pn.Value) {
|
|
|
+ pnarr = append(pnarr, pn.Value)
|
|
|
+ matchres := TitleReg.FindAllStringSubmatch(pn.Value, -1)
|
|
|
if len(matchres) == 1 && len(matchres[0]) > 0 {
|
|
|
v1 := u.PackageNumberConvert(matchres[0][0])
|
|
|
pns[v1] = matchres[0][0]
|
|
@@ -693,7 +652,9 @@ func (ts *TableResult) Analy() {
|
|
|
bp.Origin = matchres[0][0]
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
|
|
|
- bp.TableKV.Kv[k] = table.StandKV[k]
|
|
|
+ if len(table.StandKV[k]) > 0 {
|
|
|
+ bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
|
|
|
+ }
|
|
|
}
|
|
|
bp.WinnerOrder = table.WinnerOrder
|
|
|
if table.BlockPackage.Map[v1] == nil {
|
|
@@ -718,8 +679,7 @@ func (ts *TableResult) Analy() {
|
|
|
}
|
|
|
}
|
|
|
if btrue {
|
|
|
- ts.SortKV.AddKey("项目名称", pname)
|
|
|
- ts.SortKVWeight["项目名称"] = 100
|
|
|
+ ts.KvTags["项目名称"] = append(ts.KvTags["项目名称"], &u.Tag{Key: "项目名称", Value: pname, Weight: 100})
|
|
|
for _, table := range tabs {
|
|
|
table.BPackage = true
|
|
|
//预算、中标金额、NullTxtBid成交供应商排名 中标单位 成交状态
|
|
@@ -733,7 +693,9 @@ func (ts *TableResult) Analy() {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
for nk, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
|
|
|
- bp.TableKV.Kv[k] = table.StandKV[k]
|
|
|
+ if len(table.StandKV[k]) > 0 {
|
|
|
+ bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
|
|
|
+ }
|
|
|
if nk < 4 {
|
|
|
delete(table.StandKV, k)
|
|
|
}
|
|
@@ -789,7 +751,7 @@ func (table *Table) createTabe(trs *goquery.Selection) {
|
|
|
td := NewTD(selm, TR, table) //初始化td,kv处理,td中有table处理,td的方向
|
|
|
//num++
|
|
|
TR.AddTD(td)
|
|
|
- if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0{ //删除一个tr,tr中所有td是空值的
|
|
|
+ if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0 { //删除一个tr,tr中所有td是空值的
|
|
|
empty++
|
|
|
if tds.Size() == empty {
|
|
|
tdTextIsNull = true
|
|
@@ -823,8 +785,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
|
if table.Tag != "" {
|
|
|
_, _, b := CheckMultiPackage(table.Tag, "")
|
|
|
if b {
|
|
|
- table.StandKV["项目名称"] = table.Tag
|
|
|
- table.StandKVWeight["项目名称"] = -100
|
|
|
+ table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -100})
|
|
|
}
|
|
|
}
|
|
|
table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
|
|
@@ -836,29 +797,12 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
|
}
|
|
|
//判断是否是多包,并处理分包的//遍历td分块
|
|
|
table.CheckMultiPackageByTable()
|
|
|
- //str := "\n"
|
|
|
- //for k, v := range table.StandKV {
|
|
|
- // str += fmt.Sprintf("_==___%s:%v\n", k, v)
|
|
|
- // if table.TableResult.SortKV.Map[k] == nil {
|
|
|
- // table.TableResult.SortKV.AddKey(k, v)
|
|
|
- // table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
- // }
|
|
|
- //}
|
|
|
res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
|
|
|
if !res {
|
|
|
//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
|
|
|
table.KVFilter()
|
|
|
}
|
|
|
- for k, v := range table.StandKV { //过滤后的标准化kv
|
|
|
- if table.TableResult.SortKV.Map[k] == nil || table.StandKVWeight[k] > table.TableResult.SortKVWeight[k] {
|
|
|
- table.TableResult.SortKV.AddKey(k, v)
|
|
|
- table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
- }
|
|
|
- }
|
|
|
- for k, v := range table.SortKV.NotTagKey {
|
|
|
- table.TableResult.SortKV.NotTagKey[k] = v
|
|
|
- }
|
|
|
- //u.Debug(str)
|
|
|
+ //MergeKvTags(table.TableResult.KvTags, table.StandKV)
|
|
|
}
|
|
|
}
|
|
|
return ts
|
|
@@ -902,8 +846,7 @@ func (table *Table) tableSubDemolitionTable() []*Table {
|
|
|
tab1 = NewTable("", table.TableResult, table.Goquery)
|
|
|
tab1.BSplit = true
|
|
|
if tmn[rownum] != nil {
|
|
|
- tab1.StandKV["项目名称"] = tmn[rownum]["tag"].(string)
|
|
|
- tab1.StandKVWeight["项目名称"] = -100
|
|
|
+ tab1.StandKV["项目名称"] = append(tab1.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: tmn[rownum]["tag"].(string), Weight: -100})
|
|
|
}
|
|
|
ts = append(ts, tab1)
|
|
|
}
|
|
@@ -2092,11 +2035,20 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
|
|
|
}
|
|
|
} else if val, bvs := v1.(string); bvs && len(index) == 1 {
|
|
|
//删除子包的kv
|
|
|
- k1tags, _, _, _, _, _ := CommonDataAnaly(k1, "", "", val)
|
|
|
- if len(k1tags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(k1tags[0]) { //(k1tags[0].Value == "采购单位" || k1tags[0].Value == "项目编号")) {
|
|
|
- //log.Println("remove", k1, val)
|
|
|
- tn.SortKV.RemoveKey(k1)
|
|
|
- tn.assemblePackage(k1, val, index[0])
|
|
|
+ kvTags, _ := CommonDataAnaly(k1, "", "", val)
|
|
|
+ for kvTag_k, kvTag_v := range kvTags {
|
|
|
+ hasValid := false
|
|
|
+ for _, kvTag_vv := range kvTag_v {
|
|
|
+ if kvTag_vv.IsInvalid {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ hasValid = true
|
|
|
+ }
|
|
|
+ if hasValid && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(kvTag_k) {
|
|
|
+ tn.SortKV.RemoveKey(k1)
|
|
|
+ tn.assemblePackage(k1, val, index[0])
|
|
|
+ //log.Println("remove", k1, val)
|
|
|
+ }
|
|
|
}
|
|
|
//u.Debug("----==2==-------", k1)
|
|
|
}
|
|
@@ -2131,18 +2083,34 @@ func (tn *Table) isGoonNext() {
|
|
|
if bp.TableKV == nil {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
- for k2, v2 := range mv.ColonKV.Kv {
|
|
|
- if bp.TableKV.Kv[k2] == "" {
|
|
|
- bp.TableKV.Kv[k2] = v2
|
|
|
- bp.TableKV.KvTag[k2] = mv.ColonKV.KvTag[k2]
|
|
|
- bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ for k2, v2 := range mv.ColonKV.KvTags {
|
|
|
+ for _, v2v := range v2 {
|
|
|
+ isExists := false
|
|
|
+ for _, v2vv := range bp.TableKV.KvTags[k2] {
|
|
|
+ if v2v.Value == v2vv.Value {
|
|
|
+ isExists = true
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !isExists {
|
|
|
+ bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
|
|
|
+ bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- for k2, v2 := range mv.SpaceKV.Kv {
|
|
|
- if bp.TableKV.Kv[k2] == "" {
|
|
|
- bp.TableKV.Kv[k2] = v2
|
|
|
- bp.TableKV.KvTag[k2] = mv.SpaceKV.KvTag[k2]
|
|
|
- bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ for k2, v2 := range mv.SpaceKV.KvTags {
|
|
|
+ for _, v2v := range v2 {
|
|
|
+ isExists := false
|
|
|
+ for _, v2vv := range bp.SpaceKV.KvTags[k2] {
|
|
|
+ if v2v.Value == v2vv.Value {
|
|
|
+ isExists = true
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !isExists {
|
|
|
+ bp.SpaceKV.KvTags[k2] = append(bp.SpaceKV.KvTags[k2], v2v)
|
|
|
+ bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -2314,22 +2282,9 @@ func (tn *Table) assemblePackage(k1, v1, key string) {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
if v1 != "" {
|
|
|
- k2, _, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
|
|
|
- if bf {
|
|
|
- for pos, k3 := range k2 {
|
|
|
- if bp.TableKV.Kv != nil && bp.TableKV.KvTag[k3] != nil && (bp.TableKV.Kv[k3] == "" || w1[pos] > bp.TableKV.KvTag[k3].Weight) {
|
|
|
- bp.TableKV.Kv[k3] = v2
|
|
|
- bp.TableKV.KvTag[k3] = &u.Tag{Value: v2, Weight: w1[pos]}
|
|
|
- } else {
|
|
|
- bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
|
|
|
- //if tn.SortKV.Map[k3] == nil {
|
|
|
- // tn.SortKV.AddKey(k3, v2) //添加匹配到抽取关键词的key,value
|
|
|
- // tn.StandKVWeight[k3]=w1[pos]
|
|
|
- //}
|
|
|
- }
|
|
|
- }
|
|
|
- } else {
|
|
|
- bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
|
|
|
+ kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
|
|
|
+ for k3, v3 := range kvTags {
|
|
|
+ bp.TableKV.KvTags[k3] = append(bp.TableKV.KvTags[k3], v3...)
|
|
|
}
|
|
|
}
|
|
|
k1 = regReplAllSpace.ReplaceAllString(k1, "")
|
|
@@ -2696,9 +2651,9 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
|
|
|
} else {
|
|
|
//
|
|
|
if !strings.HasSuffix(td_k, "方式") {
|
|
|
- _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3)
|
|
|
- if len(kTag) == 1 {
|
|
|
- tagVal, _ := u.FirstKeyValueInMap(kTag)
|
|
|
+ kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts)
|
|
|
+ if len(kvTags) == 1 {
|
|
|
+ tagVal, _ := u.FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|
|
|
return
|
|
|
}
|
|
@@ -2719,8 +2674,6 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
|
|
|
}
|
|
|
}
|
|
|
td.SortKV.AddKey(myContactType+td_k, td_v)
|
|
|
- log.Println(myContactType, td_k, td_v)
|
|
|
- delete(td.SortKV.NotTagKey, td_k)
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2738,8 +2691,8 @@ func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[s
|
|
|
if ContactInfoVagueReg.MatchString(td_k) {
|
|
|
thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch)
|
|
|
} else if k == "采购单位" { //打标签,权重高的重新覆盖
|
|
|
- _, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3)
|
|
|
- tagVal, weightVal := u.FirstKeyValueInMap(kTag)
|
|
|
+ kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"})
|
|
|
+ tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == k {
|
|
|
if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
|
|
|
weightMap[k][k] = weightVal.(int)
|
|
@@ -2792,9 +2745,9 @@ func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string
|
|
|
}
|
|
|
isAddToMatchMap := true
|
|
|
if !strings.HasSuffix(td_k, "方式") {
|
|
|
- _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3)
|
|
|
- if len(kTag) == 1 {
|
|
|
- tagVal, weightVal := u.FirstKeyValueInMap(kTag)
|
|
|
+ kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts)
|
|
|
+ if len(kvTags) == 1 {
|
|
|
+ tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|
|
|
isAddToMatchMap = false
|
|
|
}
|