|
@@ -122,7 +122,7 @@ func IsHide(g *goquery.Selection) (b bool) {
|
|
|
|
|
|
//对表格的key进行标准化处理,多个k相同时,出现覆盖问题
|
|
|
//待扩展,暂不支持正则标签库
|
|
|
-func CommonDataAnaly(k, tabletag, tabledesc string, v interface{},isSite bool) (kvTags map[string][]*u.Tag, returntag string) {
|
|
|
+func CommonDataAnaly(k, tabletag, tabledesc string, v interface{},isSite bool,codeSite string) (kvTags map[string][]*u.Tag, returntag string) {
|
|
|
kvTags = map[string][]*u.Tag{}
|
|
|
v1 := ""
|
|
|
if sv, sok := v.(string); sok { //取KV
|
|
@@ -141,9 +141,9 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{},isSite bool) (
|
|
|
k1 := ClearKey(k, 2)
|
|
|
//u.Debug(2, k)
|
|
|
//取标准key
|
|
|
- res := u.GetTags(k1,isSite)
|
|
|
+ res := u.GetTags(k1,isSite,codeSite)
|
|
|
if len(res) == 0 && k1 != k {
|
|
|
- res = u.GetTags(k,isSite)
|
|
|
+ res = u.GetTags(k,isSite,codeSite)
|
|
|
k1 = k
|
|
|
}
|
|
|
//log.Println(k, res)
|
|
@@ -202,7 +202,7 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{},isSite bool) (
|
|
|
}
|
|
|
|
|
|
//对解析后的表格的kv进行过滤
|
|
|
-func (table *Table) KVFilter(isSite bool) {
|
|
|
+func (table *Table) KVFilter(isSite bool,codeSite string) {
|
|
|
//1.标准化值查找
|
|
|
//2.对数组的处理
|
|
|
//3.对分包的处理
|
|
@@ -230,7 +230,7 @@ func (table *Table) KVFilter(isSite bool) {
|
|
|
if k == `中标价(万元)\费率(%)`{
|
|
|
k = "中标价(万元)"
|
|
|
}
|
|
|
- kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite) //对key标准化处理,没有找到会走中标
|
|
|
+ kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite,codeSite) //对key标准化处理,没有找到会走中标
|
|
|
//qutil.Debug(k, v, k1, w1, v1, tag, b)
|
|
|
if tag != "" && table.Tag == "" {
|
|
|
table.Tag = tag
|
|
@@ -242,7 +242,7 @@ func (table *Table) KVFilter(isSite bool) {
|
|
|
}
|
|
|
}
|
|
|
//处理值是数组的kv放入标准化kv中//处理table.SortKV.value为数组的情况
|
|
|
- table.sortKVArr(as,isSite)
|
|
|
+ table.sortKVArr(as,isSite,codeSite)
|
|
|
//
|
|
|
if len(table.WinnerOrder) > 0 || !table.BPackage {
|
|
|
winnerOrder := []map[string]interface{}{}
|
|
@@ -274,7 +274,7 @@ func (table *Table) KVFilter(isSite bool) {
|
|
|
L: //遍历每个td,查询中标人
|
|
|
for _, tr := range table.TRs {
|
|
|
for _, td := range tr.TDs {
|
|
|
- winnerOrder = winnerOrderEntity.Find(td.Val, true, 3,isSite)
|
|
|
+ winnerOrder = winnerOrderEntity.Find(td.Val, true, 3,isSite,codeSite)
|
|
|
if len(winnerOrder) > 0 {
|
|
|
break L
|
|
|
}
|
|
@@ -305,7 +305,7 @@ func (table *Table) KVFilter(isSite bool) {
|
|
|
}
|
|
|
|
|
|
//处理table.SortKV.value为数组的情况
|
|
|
-func (table *Table) sortKVArr(as *SortMap,isSite bool) {
|
|
|
+func (table *Table) sortKVArr(as *SortMap,isSite bool,codeSite string) {
|
|
|
winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
|
|
|
if !winnertag {
|
|
|
winnertag = iswinnertabletag.MatchString(table.TableResult.BlockTag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
|
|
@@ -340,7 +340,7 @@ func (table *Table) sortKVArr(as *SortMap,isSite bool) {
|
|
|
res, _, _, _, repl := CheckCommon(k, "bidorder")
|
|
|
kv := ""
|
|
|
if !res {
|
|
|
- kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""),isSite)
|
|
|
+ kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""),isSite,codeSite)
|
|
|
if kt.Len() > 0 {
|
|
|
kv = kt[0].Value
|
|
|
}
|
|
@@ -444,7 +444,7 @@ func (table *Table) sortKVArr(as *SortMap,isSite bool) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite)
|
|
|
+ kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite,codeSite)
|
|
|
if tag != "" && table.Tag == "" {
|
|
|
table.Tag = tag
|
|
|
}
|
|
@@ -612,7 +612,7 @@ func (table *Table) MergerToTableresult() {
|
|
|
解析表格入口
|
|
|
返回:汇总表格对象
|
|
|
**/
|
|
|
-func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock,isSite bool) (tabres *TableResult) {
|
|
|
+func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock,isSite bool,codeSite string) (tabres *TableResult) {
|
|
|
defer qutil.Catch()
|
|
|
//u.Debug(con)
|
|
|
if itype == 1 {
|
|
@@ -630,12 +630,12 @@ func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype
|
|
|
tabres.GoqueryTabs = tabs
|
|
|
//}
|
|
|
//解析表格集
|
|
|
- tabres.Analy(isSite)
|
|
|
+ tabres.Analy(isSite,codeSite)
|
|
|
return
|
|
|
}
|
|
|
|
|
|
//开始解析表格集
|
|
|
-func (ts *TableResult) Analy(isSite bool) {
|
|
|
+func (ts *TableResult) Analy(isSite bool,codeSite string) {
|
|
|
tabs := []*Table{}
|
|
|
contactFormat := &u.ContactFormat{
|
|
|
IndexMap: map[int]string{},
|
|
@@ -644,7 +644,7 @@ func (ts *TableResult) Analy(isSite bool) {
|
|
|
//for _, table := range ts.GoqueryTabs {
|
|
|
tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
|
|
|
//核心模块
|
|
|
- tsw := tn.Analy(contactFormat,isSite)
|
|
|
+ tsw := tn.Analy(contactFormat,isSite,codeSite)
|
|
|
for _, tab := range tsw {
|
|
|
if len(tab.TRs) > 0 {
|
|
|
tabs = append(tabs, tab)
|
|
@@ -742,23 +742,23 @@ func (ts *TableResult) Analy(isSite bool) {
|
|
|
}
|
|
|
|
|
|
//解析表格
|
|
|
-func (table *Table) Analy(contactFormat *u.ContactFormat,isSite bool) []*Table {
|
|
|
+func (table *Table) Analy(contactFormat *u.ContactFormat,isSite bool,codeSite string) []*Table {
|
|
|
//查找表体中的tr对象
|
|
|
trs := table.Goquery.ChildrenFiltered("tbody,thead,tfoot").ChildrenFiltered("tr")
|
|
|
if trs.Size() == 0 {
|
|
|
trs = table.Goquery.ChildrenFiltered("tr")
|
|
|
}
|
|
|
//遍历节点,初始化table 结构
|
|
|
- table.createTabe(trs,isSite)
|
|
|
+ table.createTabe(trs,isSite,codeSite)
|
|
|
//重置行列
|
|
|
table.ComputeRowColSpan()
|
|
|
//对table结构体进行整体解析处理
|
|
|
- ts := table.AnalyTables(contactFormat,isSite)
|
|
|
+ ts := table.AnalyTables(contactFormat,isSite,codeSite)
|
|
|
return ts
|
|
|
}
|
|
|
|
|
|
//遍历节点,初始化table 结构体
|
|
|
-func (table *Table) createTabe(trs *goquery.Selection,isSite bool) {
|
|
|
+func (table *Table) createTabe(trs *goquery.Selection,isSite bool,codeSite string) {
|
|
|
trs.Each(func(n int, sel *goquery.Selection) {
|
|
|
//隐藏行不处理
|
|
|
if IsHide(sel) {
|
|
@@ -775,7 +775,7 @@ func (table *Table) createTabe(trs *goquery.Selection,isSite bool) {
|
|
|
return
|
|
|
}
|
|
|
//进入每一个单元格
|
|
|
- td := NewTD(selm, TR, table,isSite) //初始化td,kv处理,td中有table处理,td的方向
|
|
|
+ td := NewTD(selm, TR, table,isSite,codeSite) //初始化td,kv处理,td中有table处理,td的方向
|
|
|
//num++
|
|
|
TR.AddTD(td)
|
|
|
if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0 { //删除一个tr,tr中所有td是空值的
|
|
@@ -793,7 +793,7 @@ func (table *Table) createTabe(trs *goquery.Selection,isSite bool) {
|
|
|
}
|
|
|
|
|
|
//对table进行整体解析处理
|
|
|
-func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool) []*Table {
|
|
|
+func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool,codeSite string) []*Table {
|
|
|
ts := tn.tableSubDemolitionTable() //分包,拆表
|
|
|
for n, table := range ts {
|
|
|
//处理每个table
|
|
@@ -802,15 +802,15 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool) []*Tabl
|
|
|
table.deleteTrimTr()
|
|
|
//table.Print()
|
|
|
//校对表格
|
|
|
- table.Adjust(isSite)
|
|
|
+ table.Adjust(isSite,codeSite)
|
|
|
//查找表格的标签,table.Tag字段
|
|
|
table.FindTag()
|
|
|
//log.Println(table.TableResult.Id, table.Html)
|
|
|
//分割表格
|
|
|
- table.bSplit(n, ts,isSite)
|
|
|
- table.TdContactFormat(contactFormat,isSite) //contactFormat,处理采购单位,代理机构
|
|
|
+ table.bSplit(n, ts,isSite,codeSite)
|
|
|
+ table.TdContactFormat(contactFormat,isSite,codeSite) //contactFormat,处理采购单位,代理机构
|
|
|
//开始查找kv,核心模块,table.SortKV
|
|
|
- table.FindKV(isSite)
|
|
|
+ table.FindKV(isSite,codeSite)
|
|
|
//table中抽取品牌,table.BrandData
|
|
|
if u.IsBrandGoods {
|
|
|
table.analyBrand()
|
|
@@ -818,7 +818,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool) []*Tabl
|
|
|
res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
|
|
|
if !res {
|
|
|
//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
|
|
|
- table.KVFilter(isSite)
|
|
|
+ table.KVFilter(isSite,codeSite)
|
|
|
}
|
|
|
//对没有表头表格的处理
|
|
|
if table.Tag != "" {
|
|
@@ -848,7 +848,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool) []*Tabl
|
|
|
}
|
|
|
}
|
|
|
//判断是否是多包,并处理分包的//遍历td分块
|
|
|
- table.CheckMultiPackageByTable(isSite)
|
|
|
+ table.CheckMultiPackageByTable(isSite,codeSite)
|
|
|
//MergeKvTags(table.TableResult.KvTags, table.StandKV)
|
|
|
}
|
|
|
}
|
|
@@ -913,7 +913,7 @@ func (table *Table) tableSubDemolitionTable() []*Table {
|
|
|
}
|
|
|
|
|
|
//分割表格
|
|
|
-func (table *Table) bSplit(n int, ts []*Table,isSite bool) {
|
|
|
+func (table *Table) bSplit(n int, ts []*Table,isSite bool,codeSite string) {
|
|
|
if table.BSplit {
|
|
|
if !table.BHeader && n > 0 {
|
|
|
for i := n - 1; i > -1; i-- {
|
|
@@ -921,7 +921,7 @@ func (table *Table) bSplit(n int, ts []*Table,isSite bool) {
|
|
|
if ts[i].BFirstRow {
|
|
|
//取第一行插入到
|
|
|
table.InsertTR(ts[i].TRs[0])
|
|
|
- table.Adjust(isSite)
|
|
|
+ table.Adjust(isSite,codeSite)
|
|
|
}
|
|
|
break
|
|
|
}
|
|
@@ -952,7 +952,7 @@ func (table *Table) deleteTrimTr() {
|
|
|
}
|
|
|
|
|
|
//校对表格
|
|
|
-func (table *Table) Adjust(isSite bool) {
|
|
|
+func (table *Table) Adjust(isSite bool,codeSite string) {
|
|
|
//计算行列起止位置,跨行跨列处理
|
|
|
table.ComputeRowColSpan()
|
|
|
// for k1, tr := range table.TRs {
|
|
@@ -987,7 +987,7 @@ func (table *Table) Adjust(isSite bool) {
|
|
|
}
|
|
|
if float32(count)/float32(table.TDNum) < 0.85 {
|
|
|
//精确计算起止行列是表头的概率
|
|
|
- table.ComputeRowColIsKeyRation(isSite)
|
|
|
+ table.ComputeRowColIsKeyRation(isSite,codeSite)
|
|
|
bhead := false
|
|
|
L:
|
|
|
for i, tr := range table.TRs {
|
|
@@ -999,7 +999,7 @@ func (table *Table) Adjust(isSite bool) {
|
|
|
if res {
|
|
|
//删除此行
|
|
|
table.TRs = table.TRs[:len(table.TRs)-1]
|
|
|
- table.Adjust(isSite)
|
|
|
+ table.Adjust(isSite,codeSite)
|
|
|
return
|
|
|
}
|
|
|
}
|
|
@@ -1118,7 +1118,7 @@ func (table *Table) GetKeyRation() {
|
|
|
}
|
|
|
|
|
|
//计算行列是表头的概率调用GetKeyRation
|
|
|
-func (table *Table) ComputeRowColIsKeyRation(isSite bool) {
|
|
|
+func (table *Table) ComputeRowColIsKeyRation(isSite bool,codeSite string) {
|
|
|
//增加对跨行校正限止
|
|
|
// u.Debug(table.Brule, table.ColNum, table.RowNum, table.TDNum)
|
|
|
bkeyfirstrow := false
|
|
@@ -1357,7 +1357,7 @@ func (table *Table) ComputeRowColIsKeyRation(isSite bool) {
|
|
|
tr.TDs[0].BH = false
|
|
|
tr.TDs[0].KVDirect = 0
|
|
|
sv := FindKv(tr.TDs[0].Val, "", 2)
|
|
|
- _, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2,isSite)
|
|
|
+ _, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2,isSite,codeSite)
|
|
|
for k, v := range resm {
|
|
|
sv.AddKey(k, v)
|
|
|
}
|
|
@@ -1392,7 +1392,7 @@ func (table *Table) ComputeRowColIsKeyRation(isSite bool) {
|
|
|
}
|
|
|
|
|
|
//查找表格的kv,调用FindTdVal
|
|
|
-func (table *Table) FindKV(isSite bool) {
|
|
|
+func (table *Table) FindKV(isSite bool,codeSite string) {
|
|
|
//判断全是key的表格不再查找
|
|
|
if table.BHeader { //只要一个是key即为true
|
|
|
direct := If(table.BFirstRow, 2, 1).(int) //kv,2查找方向,向上查找
|
|
@@ -1468,7 +1468,7 @@ func (table *Table) FindKV(isSite bool) {
|
|
|
for n, r := range r1 {
|
|
|
if len([]rune(r)) < 60 { // 长度小于60才去分
|
|
|
//res1, _ := GetKVAll(r, "", nil)
|
|
|
- res1, _ := colonkvEntity.entrance(r, "", nil, 2,isSite)
|
|
|
+ res1, _ := colonkvEntity.entrance(r, "", nil, 2,isSite,codeSite)
|
|
|
if res1 != nil {
|
|
|
nmap[n] = res1
|
|
|
nmapkeys = append(nmapkeys, n)
|
|
@@ -1900,7 +1900,7 @@ func (tn *Table) GetTdByRCNo(row, col int) *TD {
|
|
|
}
|
|
|
|
|
|
//判断表格是否是分包
|
|
|
-func (tn *Table) CheckMultiPackageByTable(isSite bool) (b bool, index []string) {
|
|
|
+func (tn *Table) CheckMultiPackageByTable(isSite bool,codeSite string) (b bool, index []string) {
|
|
|
pac := 0 //包的数量
|
|
|
val := 0 //分值
|
|
|
index = []string{} //存储分包,使用tbale.SortKV的key和value使用正则等处理对值进行判断
|
|
@@ -1972,20 +1972,20 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool) (b bool, index []string)
|
|
|
tn.BlockPackage.AddKey(v, bp) //table子包数组
|
|
|
}
|
|
|
}
|
|
|
- isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos,isSite) //多包处理,处理不同情况下的分包
|
|
|
+ isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos,isSite,codeSite) //多包处理,处理不同情况下的分包
|
|
|
}
|
|
|
} else {
|
|
|
isGoonNext = true
|
|
|
}
|
|
|
if isGoonNext { //没有处理成数组的情况下,继续调用正文查找分包的方法
|
|
|
- tn.isGoonNext(isSite)
|
|
|
+ tn.isGoonNext(isSite,codeSite)
|
|
|
}
|
|
|
//查找分包中的中标人排序
|
|
|
if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
|
|
|
for _, v := range tn.BlockPackage.Keys {
|
|
|
vv, ok := tn.BlockPackage.Map[v].(*u.BlockPackage)
|
|
|
if ok && (vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0) {
|
|
|
- vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2,isSite)
|
|
|
+ vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2,isSite,codeSite)
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -1993,7 +1993,7 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool) (b bool, index []string)
|
|
|
}
|
|
|
|
|
|
//多包处理,处理不同情况下的分包
|
|
|
-func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,isSite bool) (isGoonNext bool) {
|
|
|
+func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,isSite bool,codeSite string) (isGoonNext bool) {
|
|
|
if len(index) == 1 { //是一个的情况
|
|
|
if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { //table带排序的KV值小于10并且小于10列和小于4行
|
|
|
beq := true
|
|
@@ -2034,7 +2034,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
|
|
|
if val, bvs := v1.([]string); bvs {
|
|
|
if len(val) <= len(index) { //table.SortKV.Map.value数组小于等于分包index
|
|
|
for k, v := range val {
|
|
|
- tn.assemblePackage(k1, v, index[k],isSite) //组装解析到的分包
|
|
|
+ tn.assemblePackage(k1, v, index[k],isSite,codeSite) //组装解析到的分包
|
|
|
}
|
|
|
} else {
|
|
|
for sk1, sv2 := range index {
|
|
@@ -2052,12 +2052,12 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- tn.assemblePackage(k1, v, sv2,isSite)
|
|
|
+ tn.assemblePackage(k1, v, sv2,isSite,codeSite)
|
|
|
}
|
|
|
}
|
|
|
//删除子包的kv
|
|
|
//u.Debug("----==1==-------", k1)
|
|
|
- k1tags := u.GetTags(k1,isSite) //取得匹配
|
|
|
+ k1tags := u.GetTags(k1,isSite,codeSite) //取得匹配
|
|
|
//if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
|
|
|
// tn.SortKV.RemoveKey(k1)
|
|
|
//}
|
|
@@ -2067,7 +2067,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
|
|
|
}
|
|
|
} else if val, bvs := v1.(string); bvs && len(index) == 1 {
|
|
|
//删除子包的kv
|
|
|
- kvTags, _ := CommonDataAnaly(k1, "", "", val,isSite)
|
|
|
+ kvTags, _ := CommonDataAnaly(k1, "", "", val,isSite,codeSite)
|
|
|
for kvTag_k, kvTag_v := range kvTags {
|
|
|
hasValid := false
|
|
|
for _, kvTag_vv := range kvTag_v {
|
|
@@ -2081,7 +2081,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
|
|
|
}
|
|
|
if !(len(kvTags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(kvTag_k)) {
|
|
|
tn.SortKV.RemoveKey(k1)
|
|
|
- tn.assemblePackage(k1, val, index[0],isSite)
|
|
|
+ tn.assemblePackage(k1, val, index[0],isSite,codeSite)
|
|
|
//log.Println("remove", k1, val)
|
|
|
}
|
|
|
}
|
|
@@ -2093,7 +2093,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
|
|
|
}
|
|
|
|
|
|
//没有处理成数组的情况下,继续调用正文查找分包的方法
|
|
|
-func (tn *Table) isGoonNext(isSite bool) {
|
|
|
+func (tn *Table) isGoonNext(isSite bool,codeSite string) {
|
|
|
blockPackage := map[string]*u.BlockPackage{}
|
|
|
for _, k := range tn.SortKV.Keys {
|
|
|
if excludeKey.MatchString(k) || strings.Contains(k, "批复") {
|
|
@@ -2107,7 +2107,7 @@ func (tn *Table) isGoonNext(isSite bool) {
|
|
|
} else {
|
|
|
str += fmt.Sprintf("%s:%s\n", nk, v)
|
|
|
}
|
|
|
- b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false,isSite) //分块之后分包
|
|
|
+ b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false,isSite,codeSite) //分块之后分包
|
|
|
if b && len(blockPackage) > 0 {
|
|
|
tn.BPackage = true
|
|
|
for mk, mv := range blockPackage {
|
|
@@ -2314,13 +2314,13 @@ func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, inde
|
|
|
}
|
|
|
|
|
|
//组装解析到的分包,//key如果匹配到抽取关键词就添加到table.SortKV
|
|
|
-func (tn *Table) assemblePackage(k1, v1, key string,isSite bool) {
|
|
|
+func (tn *Table) assemblePackage(k1, v1, key string,isSite bool,codeSite string) {
|
|
|
bp := tn.BlockPackage.Map[key].(*u.BlockPackage)
|
|
|
if bp.TableKV == nil {
|
|
|
bp.TableKV = u.NewJobKv()
|
|
|
}
|
|
|
if v1 != "" {
|
|
|
- kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1,isSite) //匹配抽取关键词
|
|
|
+ kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1,isSite,codeSite) //匹配抽取关键词
|
|
|
for k3, v3 := range kvTags {
|
|
|
bp.TableKV.KvTags[k3] = append(bp.TableKV.KvTags[k3], v3...)
|
|
|
}
|
|
@@ -2477,7 +2477,7 @@ func replPkgConfusion(v1 string) string {
|
|
|
}
|
|
|
|
|
|
//对td中的值,进行再处理
|
|
|
-func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat,isSite bool) {
|
|
|
+func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat,isSite bool,codeSite string) {
|
|
|
//处理表格中的联系人信息
|
|
|
indexMap := contactFormat.IndexMap
|
|
|
matchMap := contactFormat.MatchMap
|
|
@@ -2549,7 +2549,7 @@ L:
|
|
|
//和|以?及|与|、多个词和在一起
|
|
|
jumpNextTd, thisTrHasMatch = tn.tdsMultipleWords(jumpNextTd, td, td_index, tr, thisTrHasMatch, indexMap)
|
|
|
//分块之后的kv
|
|
|
- thisTdKvs := kvAfterDivideBlock("", td.Text, 3, tn.TableResult.RuleBlock,isSite)
|
|
|
+ thisTdKvs := kvAfterDivideBlock("", td.Text, 3, tn.TableResult.RuleBlock,isSite,codeSite)
|
|
|
if len(thisTdKvs) == 0 {
|
|
|
thisTdKvs = tn.tdkv(td) //获取冒号kv
|
|
|
}
|
|
@@ -2577,7 +2577,7 @@ L:
|
|
|
//都为正序查询
|
|
|
if allAscFind && tdAscFind {
|
|
|
//都为正序查询处理
|
|
|
- matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex,isSite)
|
|
|
+ matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex,isSite,codeSite)
|
|
|
}
|
|
|
if iscontinue {
|
|
|
continue
|
|
@@ -2647,7 +2647,7 @@ L:
|
|
|
}
|
|
|
thisTrHasMatch = true
|
|
|
//modle
|
|
|
- modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index,isSite)
|
|
|
+ modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index,isSite,codeSite)
|
|
|
}
|
|
|
}
|
|
|
//u.Debug(td.SortKV.Map)
|
|
@@ -2675,7 +2675,7 @@ L:
|
|
|
}
|
|
|
|
|
|
//modle
|
|
|
-func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int,isSite bool) {
|
|
|
+func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int,isSite bool,codeSite string) {
|
|
|
modle := 0
|
|
|
if len(thisTdKvs) == 1 {
|
|
|
if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
|
|
@@ -2690,7 +2690,7 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
|
|
|
} else {
|
|
|
//
|
|
|
if !strings.HasSuffix(td_k, "方式") {
|
|
|
- kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts,isSite)
|
|
|
+ kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts,isSite,codeSite)
|
|
|
if len(kvTags) == 1 {
|
|
|
tagVal, _ := u.FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|
|
@@ -2717,7 +2717,7 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
|
|
|
}
|
|
|
|
|
|
//都为正序查询
|
|
|
-func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int,isSite bool) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
|
|
|
+func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int,isSite bool,codeSite string) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
|
|
|
for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
|
|
|
if !ContactType[k].MatchString(td_k) { //没有匹配到采购单位,代理机构
|
|
|
continue
|
|
@@ -2728,9 +2728,9 @@ func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[s
|
|
|
}
|
|
|
//匹配到进行处理
|
|
|
if ContactInfoVagueReg.MatchString(td_k) {
|
|
|
- thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch,isSite)
|
|
|
+ thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch,isSite,codeSite)
|
|
|
} else if k == "采购单位" { //打标签,权重高的重新覆盖
|
|
|
- kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"},isSite)
|
|
|
+ kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"},isSite,codeSite)
|
|
|
tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == k {
|
|
|
if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
|
|
@@ -2781,13 +2781,13 @@ func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[s
|
|
|
}
|
|
|
|
|
|
//匹配到进行处理
|
|
|
-func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool,isSite bool) bool {
|
|
|
+func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool,isSite bool,codeSite string) bool {
|
|
|
if (*matchMap)[k] == nil {
|
|
|
(*matchMap)[k] = map[string]bool{}
|
|
|
}
|
|
|
isAddToMatchMap := true
|
|
|
if !strings.HasSuffix(td_k, "方式") {
|
|
|
- kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts,isSite)
|
|
|
+ kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts,isSite,codeSite)
|
|
|
if len(kvTags) == 1 {
|
|
|
tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|