|
@@ -3,7 +3,6 @@ package pretreated
|
|
|
import (
|
|
|
"fmt"
|
|
|
u "jy/util"
|
|
|
- "log"
|
|
|
qutil "qfw/util"
|
|
|
"regexp"
|
|
|
"strings"
|
|
@@ -126,10 +125,9 @@ func IsHide(g *goquery.Selection) (b bool) {
|
|
|
|
|
|
//对表格的key进行标准化处理,多个k相同时,出现覆盖问题
|
|
|
//待扩展,暂不支持正则标签库
|
|
|
-func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1, k2 []string, weight []int, v1, returntag string, b bool) {
|
|
|
- k1, k2 = []string{}, []string{}
|
|
|
- weight = []int{}
|
|
|
- tk := k
|
|
|
+func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (kvTags map[string][]*u.Tag, returntag string) {
|
|
|
+ kvTags = map[string][]*u.Tag{}
|
|
|
+ v1 := ""
|
|
|
if sv, sok := v.(string); sok { //取KV
|
|
|
v1 = sv
|
|
|
} else if sv, sok := v.([]string); sok { //是数组先默认取第一个
|
|
@@ -138,17 +136,18 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1, k2 []str
|
|
|
}
|
|
|
}
|
|
|
//对值单位的处理 (预算|费|价|额|规模|投资)
|
|
|
- if moneyreg.MatchString(tk) {
|
|
|
- v1 += GetMoneyUnit(tk, v1)
|
|
|
+ if moneyreg.MatchString(k) {
|
|
|
+ v1 += GetMoneyUnit(k, v1)
|
|
|
}
|
|
|
//先清理key
|
|
|
//u.Debug(1, k, v1)
|
|
|
- k = ClearKey(k, 2)
|
|
|
+ k1 := ClearKey(k, 2)
|
|
|
//u.Debug(2, k)
|
|
|
//取标准key
|
|
|
- res := u.GetTags(k)
|
|
|
- if len(res) == 0 && tk != k {
|
|
|
- res = u.GetTags(tk)
|
|
|
+ res := u.GetTags(k1)
|
|
|
+ if len(res) == 0 && k1 != k {
|
|
|
+ res = u.GetTags(k)
|
|
|
+ k1 = k
|
|
|
}
|
|
|
//log.Println(k, res)
|
|
|
// if len(res) == 0 {
|
|
@@ -156,50 +155,44 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1, k2 []str
|
|
|
// }
|
|
|
//当取到标准化值时,放入数组
|
|
|
if len(res) > 0 {
|
|
|
- b = true
|
|
|
for _, t1 := range res {
|
|
|
- k1 = append(k1, t1.Value)
|
|
|
- weight = append(weight, t1.Weight)
|
|
|
+ //降低冒号值的权重
|
|
|
+ if MhSpilt.MatchString(v1) {
|
|
|
+ t1.Weight -= 50
|
|
|
+ }
|
|
|
+ kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
|
|
|
}
|
|
|
//k1 = res[0].Value
|
|
|
- }
|
|
|
- //没有取到标准化key时,对中标金额和中标单位的逻辑处理
|
|
|
- if !b {
|
|
|
+ } else {
|
|
|
+ kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
|
|
|
+ //没有取到标准化key时,对中标金额和中标单位的逻辑处理
|
|
|
if filter_zbje_k.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
|
|
|
if tabletag == "" {
|
|
|
returntag = "中标情况"
|
|
|
}
|
|
|
- k1 = append(k1, "中标金额")
|
|
|
- weight = append(weight, -100)
|
|
|
- b = true
|
|
|
+ kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: "中标金额", Value: v1, Weight: -100})
|
|
|
} else if filter_zbdw_ky.MatchString(k) && !filter_zbdw_kn.MatchString(k) &&
|
|
|
filter_zbdw_v.MatchString(v1) {
|
|
|
- k1 = append(k1, "中标单位")
|
|
|
- weight = append(weight, -100)
|
|
|
+ kvTags["中标单位"] = append(kvTags["中标单位"], &u.Tag{Key: "中标单位", Value: v1, Weight: -100})
|
|
|
if tabletag == "" {
|
|
|
returntag = "中标情况"
|
|
|
}
|
|
|
- b = true
|
|
|
} else {
|
|
|
- k2 = append(k2, k)
|
|
|
- }
|
|
|
- }
|
|
|
- //对上一步没有取到标准化key的进一步处理
|
|
|
- if !b {
|
|
|
- if tabletag == "" {
|
|
|
+ //对上一步没有取到标准化key的进一步处理
|
|
|
+ if tabletag == "" {
|
|
|
|
|
|
- }
|
|
|
- if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
|
|
|
- //u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
|
|
|
- if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
|
|
|
- k1 = append(k1, "中标金额")
|
|
|
- weight = append(weight, -100)
|
|
|
- b = true
|
|
|
- } /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
|
|
|
- k1 = append(k1, "中标单位")
|
|
|
- weight = append(weight, -100)
|
|
|
- b = true
|
|
|
- }*/
|
|
|
+ }
|
|
|
+ if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
|
|
|
+ //u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
|
|
|
+ if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
|
|
|
+ kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: "中标金额", Value: v1, Weight: -100})
|
|
|
+
|
|
|
+ } /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
|
|
|
+ k1 = append(k1, "中标单位")
|
|
|
+ weight = append(weight, -100)
|
|
|
+ b = true
|
|
|
+ }*/
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
return
|
|
@@ -230,45 +223,14 @@ func (table *Table) KVFilter() {
|
|
|
continue
|
|
|
}
|
|
|
v := table.SortKV.Map[k]
|
|
|
- if table.SortKVWeight[k] == -99 { //td格式化kv降低权重
|
|
|
- as.AddKey(k, v)
|
|
|
- continue
|
|
|
- }
|
|
|
if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
|
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
|
- k1, n_k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
|
+ kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
|
//qutil.Debug(k, v, k1, w1, v1, tag, b)
|
|
|
- if b {
|
|
|
- //降低冒号值的权重
|
|
|
- if MhSpilt.MatchString(v1) {
|
|
|
- for pos, _ := range k1 {
|
|
|
- w1[pos] -= 50
|
|
|
- }
|
|
|
- }
|
|
|
- if tag != "" && table.Tag == "" {
|
|
|
- table.Tag = tag
|
|
|
- }
|
|
|
- for pos, k2 := range k1 { //根据关键词,过滤table.SortKV到table.StandKV和table.StandKVWeight
|
|
|
- if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] {
|
|
|
- table.StandKV[k2] = v1 //本节点
|
|
|
- table.StandKVWeight[k2] = w1[pos]
|
|
|
- }
|
|
|
- // else if k2 == "中标金额" {
|
|
|
- // // u.Debug(qutil.Float64All(v1), qutil.Float64All(table.StandKV[k2]))
|
|
|
- // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
|
|
|
- // table.StandKV[k2] = v1
|
|
|
- // }
|
|
|
- // }
|
|
|
- }
|
|
|
- } else {
|
|
|
- if table.StandKV[k] == "" && qutil.ObjToString(v) != "" {
|
|
|
- table.StandKV[k] = qutil.ObjToString(v)
|
|
|
- table.StandKVWeight[k] = -99
|
|
|
- }
|
|
|
- }
|
|
|
- for _, n_k2 := range n_k1 {
|
|
|
- table.SortKV.NotTagKey[n_k2] = true
|
|
|
+ if tag != "" && table.Tag == "" {
|
|
|
+ table.Tag = tag
|
|
|
}
|
|
|
+ MergeKvTags(table.StandKV, kvTags)
|
|
|
} else {
|
|
|
//u.Debug(k, v, "---------")
|
|
|
as.AddKey(k, v)
|
|
@@ -321,11 +283,10 @@ func (table *Table) KVFilter() {
|
|
|
if len(table.WinnerOrder) > 0 {
|
|
|
//中标候选人合并
|
|
|
winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
|
|
|
- if table.StandKV["中标单位"] == "" {
|
|
|
+ if len(table.StandKV["中标单位"]) == 0 {
|
|
|
ent := table.WinnerOrder[0]["entname"]
|
|
|
if ent != nil {
|
|
|
- table.StandKV["中标单位"], _ = ent.(string)
|
|
|
- table.StandKVWeight["中标单位"] = -25
|
|
|
+ table.StandKV["中标单位"] = append(table.StandKV["中标单位"], &u.Tag{Key: "中标单位", Value: qutil.ObjToString(ent), Weight: -25})
|
|
|
}
|
|
|
}
|
|
|
} else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
|
|
@@ -353,7 +314,7 @@ func (table *Table) KVFilter() {
|
|
|
func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
|
|
|
checkKey := map[int]bool{}
|
|
|
for kn, k := range as.Keys { //遍历table.SortKV.value为数组的key
|
|
|
- if strings.TrimSpace(table.StandKV[k]) != "" {
|
|
|
+ if len(table.StandKV[k]) == 0 || strings.TrimSpace(table.StandKV[k][0].Value) != "" {
|
|
|
continue
|
|
|
}
|
|
|
v := as.Map[k]
|
|
@@ -488,26 +449,17 @@ func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- k1, n_k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v)
|
|
|
- if b {
|
|
|
- if tag != "" && table.Tag == "" {
|
|
|
- table.Tag = tag
|
|
|
- }
|
|
|
- for pos, k2 := range k1 {
|
|
|
- if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] {
|
|
|
- table.StandKV[k2] = v1 //本节点
|
|
|
- table.StandKVWeight[k2] = w1[pos]
|
|
|
- }
|
|
|
- // else if k2 == "中标金额" {
|
|
|
- // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
|
|
|
- // table.StandKV[k2] = v1
|
|
|
- // }
|
|
|
- // }
|
|
|
- }
|
|
|
- } else {
|
|
|
- for _, n_k2 := range n_k1 {
|
|
|
- table.SortKV.NotTagKey[n_k2] = true
|
|
|
- }
|
|
|
+ kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v)
|
|
|
+ if tag != "" && table.Tag == "" {
|
|
|
+ table.Tag = tag
|
|
|
+ }
|
|
|
+ for kk, vv := range kvTags {
|
|
|
+ table.StandKV[kk] = append(table.StandKV[kk], vv...)
|
|
|
+ // else if k2 == "中标金额" {
|
|
|
+ // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
|
|
|
+ // table.StandKV[k2] = v1
|
|
|
+ // }
|
|
|
+ // }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -547,11 +499,8 @@ func (table *Table) analyTdKV() {
|
|
|
//u.Debug(td.BH, td.Val, td.SonTableResult)
|
|
|
if td.SonTableResult != nil {
|
|
|
//u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs)
|
|
|
- for _, k3 := range td.SonTableResult.SortKV.Keys {
|
|
|
- if table.StandKV[k3] == "" || td.SonTableResult.SortKVWeight[k3] > table.StandKVWeight[k3] {
|
|
|
- table.StandKV[k3] = qutil.ObjToString(td.SonTableResult.SortKV.Map[k3])
|
|
|
- table.StandKVWeight[k3] = td.SonTableResult.SortKVWeight[k3]
|
|
|
- }
|
|
|
+ for k3, v3 := range td.SonTableResult.KvTags {
|
|
|
+ table.StandKV[k3] = append(table.StandKV[k3], v3...)
|
|
|
}
|
|
|
//中标候选人排序
|
|
|
if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 {
|
|
@@ -579,15 +528,23 @@ func (table *Table) MergerToTableresult() {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
v1 := v.(*u.BlockPackage)
|
|
|
- if v1.TableKV != nil && v1.TableKV.Kv != nil {
|
|
|
- for k2, v2 := range v1.TableKV.Kv {
|
|
|
+ if v1.TableKV != nil && len(v1.TableKV.KvTags) > 0 {
|
|
|
+ for k2, v2 := range v1.TableKV.KvTags {
|
|
|
if bp.TableKV == nil {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
- if bp.TableKV.Kv[k2] == "" || (v1.TableKV.KvTag[k2] != nil && bp.TableKV.KvTag[k2] != nil && v1.TableKV.KvTag[k2].Weight > bp.TableKV.KvTag[k2].Weight) {
|
|
|
- //可能会报错 assignment to entry in nil map
|
|
|
- bp.TableKV.Kv[k2] = v2
|
|
|
- bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ isExists := false
|
|
|
+ for _, v2v := range v2 {
|
|
|
+ for _, v2vv := range bp.TableKV.KvTags[k2] {
|
|
|
+ if v2v.Value == v2vv.Value {
|
|
|
+ isExists = true
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !isExists {
|
|
|
+ bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
|
|
|
+ bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -603,25 +560,12 @@ func (table *Table) MergerToTableresult() {
|
|
|
// u.Debug(table, table.TableResult, str)
|
|
|
}
|
|
|
//遍历标准key到tableresult.sortkv中
|
|
|
- for k, v := range table.StandKV {
|
|
|
- if table.TableResult.SortKV.Map[k] == nil || table.StandKVWeight[k] > table.TableResult.SortKVWeight[k] || strings.Contains(table.Tag, "变更") {
|
|
|
- v = strings.Replace(v, "__", "", -1)
|
|
|
- if table.TableResult.SortKV.Map[k] == nil {
|
|
|
- table.TableResult.SortKV.AddKey(k, v) //父集
|
|
|
- } else {
|
|
|
- if k == "项目编号" { //项目编号存在,又匹配到全为中文跳过
|
|
|
-
|
|
|
- if regHz.MatchString(v) {
|
|
|
- continue
|
|
|
- }
|
|
|
- }
|
|
|
- table.TableResult.SortKV.ReplaceKey(k, v, k)
|
|
|
- }
|
|
|
- table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
- } else if table.TableResult.SortKV.Map[k] != nil {
|
|
|
- //u.Debug(k, v, table.TableResult.SortKV.Map[k], "..............")
|
|
|
+ for _, v := range table.StandKV {
|
|
|
+ for _, vv := range v {
|
|
|
+ vv.Value = strings.Replace(vv.Value, "__", "", -1)
|
|
|
}
|
|
|
}
|
|
|
+ MergeKvTags(table.TableResult.KvTags, table.StandKV)
|
|
|
//表格的块标签
|
|
|
if table.TableResult.BlockTag == "" && table.Tag != "" {
|
|
|
table.TableResult.BlockTag = table.Tag
|
|
@@ -690,10 +634,13 @@ func (ts *TableResult) Analy() {
|
|
|
pns := map[string]string{}
|
|
|
pnarr := []string{}
|
|
|
for _, table := range tabs {
|
|
|
- pn := table.StandKV["项目名称"]
|
|
|
- if pn != "" && TitleReg.MatchString(pn) {
|
|
|
- pnarr = append(pnarr, pn)
|
|
|
- matchres := TitleReg.FindAllStringSubmatch(pn, -1)
|
|
|
+ if len(table.StandKV["项目名称"]) == 0 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ pn := table.StandKV["项目名称"][0]
|
|
|
+ if pn != nil && pn.Value != "" && TitleReg.MatchString(pn.Value) {
|
|
|
+ pnarr = append(pnarr, pn.Value)
|
|
|
+ matchres := TitleReg.FindAllStringSubmatch(pn.Value, -1)
|
|
|
if len(matchres) == 1 && len(matchres[0]) > 0 {
|
|
|
v1 := u.PackageNumberConvert(matchres[0][0])
|
|
|
pns[v1] = matchres[0][0]
|
|
@@ -702,7 +649,9 @@ func (ts *TableResult) Analy() {
|
|
|
bp.Origin = matchres[0][0]
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
|
|
|
- bp.TableKV.Kv[k] = table.StandKV[k]
|
|
|
+ if len(table.StandKV[k]) > 0 {
|
|
|
+ bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
|
|
|
+ }
|
|
|
}
|
|
|
bp.WinnerOrder = table.WinnerOrder
|
|
|
if table.BlockPackage.Map[v1] == nil {
|
|
@@ -727,8 +676,7 @@ func (ts *TableResult) Analy() {
|
|
|
}
|
|
|
}
|
|
|
if btrue {
|
|
|
- ts.SortKV.AddKey("项目名称", pname)
|
|
|
- ts.SortKVWeight["项目名称"] = 100
|
|
|
+ ts.KvTags["项目名称"] = append(ts.KvTags["项目名称"], &u.Tag{Key: "项目名称", Value: pname, Weight: 100})
|
|
|
for _, table := range tabs {
|
|
|
table.BPackage = true
|
|
|
//预算、中标金额、NullTxtBid成交供应商排名 中标单位 成交状态
|
|
@@ -742,7 +690,9 @@ func (ts *TableResult) Analy() {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
for nk, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
|
|
|
- bp.TableKV.Kv[k] = table.StandKV[k]
|
|
|
+ if len(table.StandKV[k]) > 0 {
|
|
|
+ bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
|
|
|
+ }
|
|
|
if nk < 4 {
|
|
|
delete(table.StandKV, k)
|
|
|
}
|
|
@@ -832,8 +782,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
|
if table.Tag != "" {
|
|
|
_, _, b := CheckMultiPackage(table.Tag, "")
|
|
|
if b {
|
|
|
- table.StandKV["项目名称"] = table.Tag
|
|
|
- table.StandKVWeight["项目名称"] = -100
|
|
|
+ table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -100})
|
|
|
}
|
|
|
}
|
|
|
table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
|
|
@@ -845,35 +794,12 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
|
}
|
|
|
//判断是否是多包,并处理分包的//遍历td分块
|
|
|
table.CheckMultiPackageByTable()
|
|
|
- //str := "\n"
|
|
|
- //for k, v := range table.StandKV {
|
|
|
- // str += fmt.Sprintf("_==___%s:%v\n", k, v)
|
|
|
- // if table.TableResult.SortKV.Map[k] == nil {
|
|
|
- // table.TableResult.SortKV.AddKey(k, v)
|
|
|
- // table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
- // }
|
|
|
- //}
|
|
|
res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
|
|
|
if !res {
|
|
|
//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
|
|
|
table.KVFilter()
|
|
|
}
|
|
|
- if len(table.StandKV) == 0{
|
|
|
- for k,v := range table.SortKV.Map{
|
|
|
- table.StandKV[k] = fmt.Sprint(v)
|
|
|
- table.StandKVWeight[k] = -999
|
|
|
- }
|
|
|
- }
|
|
|
- for k, v := range table.StandKV { //过滤后的标准化kv
|
|
|
- if table.TableResult.SortKV.Map[k] == nil || table.StandKVWeight[k] > table.TableResult.SortKVWeight[k] {
|
|
|
- table.TableResult.SortKV.AddKey(k, v)
|
|
|
- table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
- }
|
|
|
- }
|
|
|
- for k, v := range table.SortKV.NotTagKey {
|
|
|
- table.TableResult.SortKV.NotTagKey[k] = v
|
|
|
- }
|
|
|
- //u.Debug(str)
|
|
|
+ //MergeKvTags(table.TableResult.KvTags, table.StandKV)
|
|
|
}
|
|
|
}
|
|
|
return ts
|
|
@@ -917,8 +843,7 @@ func (table *Table) tableSubDemolitionTable() []*Table {
|
|
|
tab1 = NewTable("", table.TableResult, table.Goquery)
|
|
|
tab1.BSplit = true
|
|
|
if tmn[rownum] != nil {
|
|
|
- tab1.StandKV["项目名称"] = tmn[rownum]["tag"].(string)
|
|
|
- tab1.StandKVWeight["项目名称"] = -100
|
|
|
+ tab1.StandKV["项目名称"] = append(tab1.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: tmn[rownum]["tag"].(string), Weight: -100})
|
|
|
}
|
|
|
ts = append(ts, tab1)
|
|
|
}
|
|
@@ -1494,7 +1419,6 @@ func (table *Table) FindKV() {
|
|
|
continue
|
|
|
}
|
|
|
table.SortKV.AddKey(tdk, tdv)
|
|
|
- table.SortKVWeight[tdk] = -99
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -2103,16 +2027,27 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
|
|
|
//}
|
|
|
for _, vcgdw := range k1tags {
|
|
|
if vcgdw.Value == "采购单位" {
|
|
|
- tn.SortKV.RemoveKey(k1)
|
|
|
}
|
|
|
}
|
|
|
} else if val, bvs := v1.(string); bvs && len(index) == 1 {
|
|
|
//删除子包的kv
|
|
|
- k1tags, _, _, _, _, _ := CommonDataAnaly(k1, "", "", val)
|
|
|
- if !(len(k1tags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(k1tags[0])) {
|
|
|
- //log.Println("remove", k1, val)
|
|
|
- tn.SortKV.RemoveKey(k1)
|
|
|
- tn.assemblePackage(k1, val, index[0])
|
|
|
+ kvTags, _ := CommonDataAnaly(k1, "", "", val)
|
|
|
+ for kvTag_k, kvTag_v := range kvTags {
|
|
|
+ hasValid := false
|
|
|
+ for _, kvTag_vv := range kvTag_v {
|
|
|
+ if kvTag_vv.IsInvalid {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ hasValid = true
|
|
|
+ }
|
|
|
+ if !hasValid {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if !(len(kvTags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(kvTag_k)) {
|
|
|
+ tn.SortKV.RemoveKey(k1)
|
|
|
+ tn.assemblePackage(k1, val, index[0])
|
|
|
+ //log.Println("remove", k1, val)
|
|
|
+ }
|
|
|
}
|
|
|
//u.Debug("----==2==-------", k1)
|
|
|
}
|
|
@@ -2147,18 +2082,34 @@ func (tn *Table) isGoonNext() {
|
|
|
if bp.TableKV == nil {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
- for k2, v2 := range mv.ColonKV.Kv {
|
|
|
- if bp.TableKV.Kv[k2] == "" {
|
|
|
- bp.TableKV.Kv[k2] = v2
|
|
|
- bp.TableKV.KvTag[k2] = mv.ColonKV.KvTag[k2]
|
|
|
- bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ for k2, v2 := range mv.ColonKV.KvTags {
|
|
|
+ for _, v2v := range v2 {
|
|
|
+ isExists := false
|
|
|
+ for _, v2vv := range bp.TableKV.KvTags[k2] {
|
|
|
+ if v2v.Value == v2vv.Value {
|
|
|
+ isExists = true
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !isExists {
|
|
|
+ bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
|
|
|
+ bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- for k2, v2 := range mv.SpaceKV.Kv {
|
|
|
- if bp.TableKV.Kv[k2] == "" {
|
|
|
- bp.TableKV.Kv[k2] = v2
|
|
|
- bp.TableKV.KvTag[k2] = mv.SpaceKV.KvTag[k2]
|
|
|
- bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ for k2, v2 := range mv.SpaceKV.KvTags {
|
|
|
+ for _, v2v := range v2 {
|
|
|
+ isExists := false
|
|
|
+ for _, v2vv := range bp.SpaceKV.KvTags[k2] {
|
|
|
+ if v2v.Value == v2vv.Value {
|
|
|
+ isExists = true
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !isExists {
|
|
|
+ bp.SpaceKV.KvTags[k2] = append(bp.SpaceKV.KvTags[k2], v2v)
|
|
|
+ bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -2330,22 +2281,9 @@ func (tn *Table) assemblePackage(k1, v1, key string) {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
if v1 != "" {
|
|
|
- k2, _, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
|
|
|
- if bf {
|
|
|
- for pos, k3 := range k2 {
|
|
|
- if bp.TableKV.Kv != nil && bp.TableKV.KvTag[k3] != nil && (bp.TableKV.Kv[k3] == "" || w1[pos] > bp.TableKV.KvTag[k3].Weight) {
|
|
|
- bp.TableKV.Kv[k3] = v2
|
|
|
- bp.TableKV.KvTag[k3] = &u.Tag{Value: v2, Weight: w1[pos]}
|
|
|
- } else {
|
|
|
- bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
|
|
|
- //if tn.SortKV.Map[k3] == nil {
|
|
|
- // tn.SortKV.AddKey(k3, v2) //添加匹配到抽取关键词的key,value
|
|
|
- // tn.StandKVWeight[k3]=w1[pos]
|
|
|
- //}
|
|
|
- }
|
|
|
- }
|
|
|
- } else {
|
|
|
- bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
|
|
|
+ kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
|
|
|
+ for k3, v3 := range kvTags {
|
|
|
+ bp.TableKV.KvTags[k3] = append(bp.TableKV.KvTags[k3], v3...)
|
|
|
}
|
|
|
}
|
|
|
k1 = regReplAllSpace.ReplaceAllString(k1, "")
|
|
@@ -2712,9 +2650,9 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
|
|
|
} else {
|
|
|
//
|
|
|
if !strings.HasSuffix(td_k, "方式") {
|
|
|
- _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3)
|
|
|
- if len(kTag) == 1 {
|
|
|
- tagVal, _ := u.FirstKeyValueInMap(kTag)
|
|
|
+ kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts)
|
|
|
+ if len(kvTags) == 1 {
|
|
|
+ tagVal, _ := u.FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|
|
|
return
|
|
|
}
|
|
@@ -2735,8 +2673,6 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
|
|
|
}
|
|
|
}
|
|
|
td.SortKV.AddKey(myContactType+td_k, td_v)
|
|
|
- log.Println(myContactType, td_k, td_v)
|
|
|
- delete(td.SortKV.NotTagKey, td_k)
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2754,8 +2690,8 @@ func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[s
|
|
|
if ContactInfoVagueReg.MatchString(td_k) {
|
|
|
thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch)
|
|
|
} else if k == "采购单位" { //打标签,权重高的重新覆盖
|
|
|
- _, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3)
|
|
|
- tagVal, weightVal := u.FirstKeyValueInMap(kTag)
|
|
|
+ kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"})
|
|
|
+ tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == k {
|
|
|
if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
|
|
|
weightMap[k][k] = weightVal.(int)
|
|
@@ -2808,9 +2744,9 @@ func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string
|
|
|
}
|
|
|
isAddToMatchMap := true
|
|
|
if !strings.HasSuffix(td_k, "方式") {
|
|
|
- _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3)
|
|
|
- if len(kTag) == 1 {
|
|
|
- tagVal, weightVal := u.FirstKeyValueInMap(kTag)
|
|
|
+ kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts)
|
|
|
+ if len(kvTags) == 1 {
|
|
|
+ tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|
|
|
isAddToMatchMap = false
|
|
|
}
|
|
@@ -3201,7 +3137,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
|
|
|
for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|
|
|
val := table.SortKV.Map[key]
|
|
|
key = regReplAllSpace.ReplaceAllString(key, "")
|
|
|
- key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
|
+ key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
|
if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
|
|
|
/*
|
|
|
{
|