|
@@ -127,7 +127,7 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1 []string,
|
|
|
} else if sv, sok := v.([]string); sok { //是数组先默认取第一个
|
|
|
v1 = sv[0]
|
|
|
}
|
|
|
- //对值单位的处理
|
|
|
+ //对值单位的处理 (预算|费|价|额|规模|投资)
|
|
|
if moneyreg.MatchString(tk) {
|
|
|
v1 += GetMoneyUnit(tk, v1)
|
|
|
}
|
|
@@ -196,57 +196,11 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1 []string,
|
|
|
//对解析后的表格的kv进行过滤
|
|
|
func (table *Table) KVFilter() {
|
|
|
//1.标准化值查找
|
|
|
- //2.对数组的处理
|
|
|
- //3.对分包的处理
|
|
|
- //4.对KV的处理
|
|
|
- //判断表格是否有用,调用abandontable正则数组进行判断
|
|
|
- //遍历每一行
|
|
|
- for _, tr := range table.TRs {
|
|
|
- for _, td := range tr.TDs {
|
|
|
- //fmt.Println(td.BH, td.MustBH, td.Val, td.SortKV.Map)
|
|
|
- bc := false
|
|
|
- if !td.BH {
|
|
|
- //表头是否是无用内容
|
|
|
- if td.HeadTd != nil {
|
|
|
- bc, _, _, _, _ = CheckCommon(td.HeadTd.Val, "abandontable")
|
|
|
- }
|
|
|
- }
|
|
|
- if !bc {
|
|
|
- //td元素有内嵌kv,遍历放入table的Kv中
|
|
|
- if len(td.SortKV.Keys) > 0 {
|
|
|
- for _, k3 := range td.SortKV.Keys {
|
|
|
- _val := td.SortKV.Map[k3]
|
|
|
- //thisFlag := false
|
|
|
- if td.HeadTd != nil && len([]rune(k3)) < 4 {
|
|
|
- k3 = td.HeadTd.Val + k3
|
|
|
- }
|
|
|
- if table.SortKV.Map[k3] == nil {
|
|
|
- //u.Debug(k3, _val)
|
|
|
- //if !thisFlag || (thisFlag && table.SortKV.Map[k3] == nil) {
|
|
|
- table.SortKV.AddKey(k3, _val)
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- //td有子表格的处理
|
|
|
- //u.Debug(td.BH, td.Val, td.SonTableResult)
|
|
|
- if td.SonTableResult != nil {
|
|
|
- //u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs)
|
|
|
- for _, k3 := range td.SonTableResult.SortKV.Keys {
|
|
|
- if table.StandKV[k3] == "" || td.SonTableResult.SortKVWeight[k3] > table.StandKVWeight[k3] {
|
|
|
- table.StandKV[k3] = qutil.ObjToString(td.SonTableResult.SortKV.Map[k3])
|
|
|
- table.StandKVWeight[k3] = td.SonTableResult.SortKVWeight[k3]
|
|
|
- }
|
|
|
- }
|
|
|
- //中标候选人排序
|
|
|
- if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 {
|
|
|
- table.WinnerOrder = td.SonTableResult.WinnerOrder
|
|
|
- } else {
|
|
|
- winnerOrderEntity.Merge(table.WinnerOrder, td.SonTableResult.WinnerOrder)
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ // 2.对数组的处理
|
|
|
+ // 3.对分包的处理
|
|
|
+ // 4.对KV的处理
|
|
|
+ // 判断表格是否有用,调用abandontable正则数组进行判断
|
|
|
+ table.analyTdKV()//1.td元素有内嵌kv,遍历放入table的Kv中2.td有子表格的处理,中标候选人排序
|
|
|
as := NewSortMap()
|
|
|
//表格描述处理,对成交结果的处理
|
|
|
for _, k := range table.SortKV.Keys {
|
|
@@ -263,7 +217,7 @@ func (table *Table) KVFilter() {
|
|
|
//u.Debug(k, v)
|
|
|
if _, ok := v.(string); ok {
|
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
|
- k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v)
|
|
|
+ k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v)//对key标准化处理,没有找到会走中标
|
|
|
//u.Debug(k, v, k1, w1, v1, tag, b)
|
|
|
if b {
|
|
|
//降低冒号值的权重
|
|
@@ -450,6 +404,56 @@ func (table *Table) KVFilter() {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+//1.td元素有内嵌kv,遍历放入table的Kv中2.td有子表格的处理,中标候选人排序
|
|
|
+func (table *Table) analyTdKV() {
|
|
|
+ //遍历每一行
|
|
|
+ for _, tr := range table.TRs {
|
|
|
+ for _, td := range tr.TDs {
|
|
|
+ //fmt.Println(td.BH, td.MustBH, td.Val, td.SortKV.Map)
|
|
|
+ bc := false
|
|
|
+ if !td.BH {
|
|
|
+ //表头是否是无用内容
|
|
|
+ if td.HeadTd != nil {
|
|
|
+ bc, _, _, _, _ = CheckCommon(td.HeadTd.Val, "abandontable")
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !bc {
|
|
|
+ //td元素有内嵌kv,遍历放入table的Kv中
|
|
|
+ if len(td.SortKV.Keys) > 0 {
|
|
|
+ for _, k3 := range td.SortKV.Keys {
|
|
|
+ _val := td.SortKV.Map[k3]
|
|
|
+ //thisFlag := false
|
|
|
+ if td.HeadTd != nil && len([]rune(k3)) < 4 {
|
|
|
+ k3 = td.HeadTd.Val + k3
|
|
|
+ }
|
|
|
+ if table.SortKV.Map[k3] == nil {
|
|
|
+ //u.Debug(k3, _val)
|
|
|
+ //if !thisFlag || (thisFlag && table.SortKV.Map[k3] == nil) {
|
|
|
+ table.SortKV.AddKey(k3, _val)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //td有子表格的处理
|
|
|
+ //u.Debug(td.BH, td.Val, td.SonTableResult)
|
|
|
+ if td.SonTableResult != nil {
|
|
|
+ //u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs)
|
|
|
+ for _, k3 := range td.SonTableResult.SortKV.Keys {
|
|
|
+ if table.StandKV[k3] == "" || td.SonTableResult.SortKVWeight[k3] > table.StandKVWeight[k3] {
|
|
|
+ table.StandKV[k3] = qutil.ObjToString(td.SonTableResult.SortKV.Map[k3])
|
|
|
+ table.StandKVWeight[k3] = td.SonTableResult.SortKVWeight[k3]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //中标候选人排序
|
|
|
+ if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 {
|
|
|
+ table.WinnerOrder = td.SonTableResult.WinnerOrder
|
|
|
+ } else {
|
|
|
+ winnerOrderEntity.Merge(table.WinnerOrder, td.SonTableResult.WinnerOrder)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
//表格结果合并到父表格集中
|
|
|
func (table *Table) MergerToTableresult() {
|
|
@@ -646,9 +650,17 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
|
|
|
if trs.Size() == 0 {
|
|
|
trs = table.Goquery.ChildrenFiltered("tr")
|
|
|
}
|
|
|
- //num := 0
|
|
|
- //遍历tr数组
|
|
|
- trs.Each(func(n int, sel *goquery.Selection) {
|
|
|
+ //遍历节点,初始化table 结构
|
|
|
+ table.createTabe(trs)
|
|
|
+ //重置行列
|
|
|
+ table.ComputeRowColSpan()
|
|
|
+ //对table结构体进行整体解析处理
|
|
|
+ ts := table.AnalyTables(contactFormat)
|
|
|
+ return ts
|
|
|
+}
|
|
|
+//遍历节点,初始化table 结构体
|
|
|
+func (table *Table) createTabe(trs *goquery.Selection) {
|
|
|
+ trs.Each(func(n int, sel *goquery.Selection) {
|
|
|
//隐藏行不处理
|
|
|
if IsHide(sel) {
|
|
|
return
|
|
@@ -663,7 +675,7 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
|
|
|
return
|
|
|
}
|
|
|
//进入每一个单元格
|
|
|
- td := NewTD(selm, TR, table)
|
|
|
+ td := NewTD(selm, TR, table) //初始化td,kv处理,td中有table处理,td的方向
|
|
|
//num++
|
|
|
TR.AddTD(td)
|
|
|
if td.Val != "" { //删除一个tr,tr中所有td是空值的
|
|
@@ -675,22 +687,74 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
|
|
|
table.AddTR(TR)
|
|
|
}
|
|
|
})
|
|
|
- //重置行列
|
|
|
- table.ComputeRowColSpan()
|
|
|
- // for n, tr := range table.TRs {
|
|
|
- // for m, td := range tr.TDs {
|
|
|
- // qutil.Debug(td.BH, n, m, td.Text, td.StartRow, td.EndRow, td.StartCol, td.EndCol)
|
|
|
- // }
|
|
|
- // }
|
|
|
+}
|
|
|
|
|
|
+//对table进行整体解析处理
|
|
|
+func (table *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
|
+ ts := table.tableSubDemolitionTable()//分包,拆表
|
|
|
+ for n, table := range ts {
|
|
|
+ //处理每个table
|
|
|
+ if len(table.TRs) > 0 {
|
|
|
+ //删除尾部空白行
|
|
|
+ table.deleteTrimTr()
|
|
|
+ //table.Print()
|
|
|
+ //校对表格
|
|
|
+ table.Adjust()
|
|
|
+ //查找表格的标签,table.Tag字段
|
|
|
+ table.FindTag()
|
|
|
+ //log.Println(table.TableResult.Id, table.Html)
|
|
|
+ //分割表格
|
|
|
+ table.bSplit(n, ts)
|
|
|
+ //对没有表头表格的处理
|
|
|
+ _, _, b := CheckMultiPackage(table.Tag, "")
|
|
|
+ if b {
|
|
|
+ table.StandKV["项目名称"] = table.Tag
|
|
|
+ table.StandKVWeight["项目名称"] = -100
|
|
|
+ }
|
|
|
+ table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
|
|
|
+ //开始查找kv,核心模块,table.SortKV
|
|
|
+ table.FindKV()
|
|
|
+ //table中抽取品牌,table.BrandData
|
|
|
+ if u.IsBrandGoods {
|
|
|
+ table.analyBrand()
|
|
|
+ }
|
|
|
+ //判断是否是多包,并处理分包的//遍历td分块
|
|
|
+ table.CheckMultiPackageByTable()
|
|
|
+ //str := "\n"
|
|
|
+ //for k, v := range table.StandKV {
|
|
|
+ // str += fmt.Sprintf("_==___%s:%v\n", k, v)
|
|
|
+ // if table.TableResult.SortKV.Map[k] == nil {
|
|
|
+ // table.TableResult.SortKV.AddKey(k, v)
|
|
|
+ // table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+ res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
|
|
|
+ if !res {
|
|
|
+ //过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
|
|
|
+ table.KVFilter()
|
|
|
+ }
|
|
|
+ for k, v := range table.StandKV {//过滤后的标准化kv
|
|
|
+ if table.TableResult.SortKV.Map[k] == nil {
|
|
|
+ table.TableResult.SortKV.AddKey(k, v)
|
|
|
+ table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //u.Debug(str)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return ts
|
|
|
+}
|
|
|
+//分包,拆表
|
|
|
+func (table *Table) tableSubDemolitionTable() []*Table {
|
|
|
tm := []map[string]interface{}{}
|
|
|
tmk := map[string]bool{}
|
|
|
tmn := map[int]map[string]interface{}{}
|
|
|
for rownum, tr := range table.TRs {
|
|
|
- if len(tr.TDs) == 1 && table.ColNum > 1 {
|
|
|
- td := tr.TDs[0]
|
|
|
+ if len(tr.TDs) == 1 && table.ColNum > 1 { //tr里面有一列,table里面有多列
|
|
|
+ td := tr.TDs[0] //取每行第一个td
|
|
|
+ //td开始列等于0 && td结束列+1等于table列数 && td长度大于1小于50
|
|
|
if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 1 && len([]rune(td.Val)) < 50 {
|
|
|
- con, m1, b := CheckMultiPackage(td.Val, "")
|
|
|
+ con, m1, b := CheckMultiPackage(td.Val, "") //判断分包
|
|
|
if b {
|
|
|
for k, _ := range m1 {
|
|
|
numstr := u.PackageNumberConvert(k)
|
|
@@ -735,103 +799,47 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
|
|
|
} else {
|
|
|
ts = append(ts, table)
|
|
|
}
|
|
|
- for n, table := range ts {
|
|
|
- if len(table.TRs) > 0 {
|
|
|
- //删除尾部空行
|
|
|
- for len(table.TRs) > 0 {
|
|
|
- npos := len(table.TRs)
|
|
|
- tailTR := table.TRs[npos-1] //最后一个tr
|
|
|
- bspace := true
|
|
|
- for _, v := range tailTR.TDs {
|
|
|
- if v.Val != "" || v.SonTableResult != nil || len(v.SortKV.Keys) > 0 {
|
|
|
- bspace = false
|
|
|
- break
|
|
|
+ return ts
|
|
|
+}
|
|
|
+//分割表格
|
|
|
+func (table *Table) bSplit( n int, ts []*Table) {
|
|
|
+ if table.BSplit {
|
|
|
+ if !table.BHeader && n > 0 {
|
|
|
+ for i := n - 1; i > -1; i-- {
|
|
|
+ if ts[i].BHeader {
|
|
|
+ if ts[i].BFirstRow {
|
|
|
+ //取第一行插入到
|
|
|
+ table.InsertTR(ts[i].TRs[0])
|
|
|
+ table.Adjust()
|
|
|
}
|
|
|
- }
|
|
|
- if bspace {
|
|
|
- table.TRs = table.TRs[:npos-1]
|
|
|
- } else {
|
|
|
break
|
|
|
}
|
|
|
}
|
|
|
- //table.Print()
|
|
|
- //校对表格
|
|
|
- table.Adjust()
|
|
|
- //查找表格的标签
|
|
|
- table.FindTag()
|
|
|
- //log.Println(table.TableResult.Id, table.Html)
|
|
|
- //分割表格
|
|
|
- if table.BSplit {
|
|
|
- if !table.BHeader && n > 0 {
|
|
|
- for i := n - 1; i > -1; i-- {
|
|
|
- if ts[i].BHeader {
|
|
|
- if ts[i].BFirstRow {
|
|
|
- //取第一行插入到
|
|
|
- table.InsertTR(ts[i].TRs[0])
|
|
|
- table.Adjust()
|
|
|
- }
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- //对没有表头表格的处理
|
|
|
- _, _, b := CheckMultiPackage(table.Tag, "")
|
|
|
- if b {
|
|
|
- table.StandKV["项目名称"] = table.Tag
|
|
|
- table.StandKVWeight["项目名称"] = -100
|
|
|
- }
|
|
|
- table.TdContactFormat(contactFormat)
|
|
|
- //开始查找kv,核心模块
|
|
|
- table.FindKV()
|
|
|
- //table中抽取品牌
|
|
|
- if u.IsBrandGoods {
|
|
|
- table.analyBrand()
|
|
|
- }
|
|
|
- //判断是否是多包,并处理分包的
|
|
|
- table.CheckMultiPackageByTable()
|
|
|
- str := "\n"
|
|
|
- for k, v := range table.StandKV {
|
|
|
- str += fmt.Sprintf("_==___%s:%v\n", k, v)
|
|
|
- if table.TableResult.SortKV.Map[k] == nil {
|
|
|
- table.TableResult.SortKV.AddKey(k, v)
|
|
|
- table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
- }
|
|
|
- }
|
|
|
- res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
|
|
|
- if !res {
|
|
|
- //过滤、标准化、合并kv
|
|
|
- table.KVFilter()
|
|
|
- }
|
|
|
- str = "\n"
|
|
|
- for k, v := range table.StandKV {
|
|
|
- str += fmt.Sprintf("_____%s:%v\n", k, v)
|
|
|
- if table.TableResult.SortKV.Map[k] == nil {
|
|
|
- table.TableResult.SortKV.AddKey(k, v)
|
|
|
- table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
|
- }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+//删除尾部空白行
|
|
|
+func (table *Table) deleteTrimTr() {
|
|
|
+ for len(table.TRs) > 0 {
|
|
|
+ npos := len(table.TRs)
|
|
|
+ tailTR := table.TRs[npos-1] //最后一个tr,取最后一行
|
|
|
+ bspace := true
|
|
|
+ for _, v := range tailTR.TDs {
|
|
|
+ if v.Val != "" || v.SonTableResult != nil || len(v.SortKV.Keys) > 0 {
|
|
|
+ bspace = false
|
|
|
+ break
|
|
|
}
|
|
|
- //u.Debug(str)
|
|
|
+ }
|
|
|
+ //删除尾部空行,是空行的话就删除
|
|
|
+ if bspace {
|
|
|
+ table.TRs = table.TRs[:npos-1]
|
|
|
+ } else {
|
|
|
+ break
|
|
|
}
|
|
|
}
|
|
|
- return ts
|
|
|
}
|
|
|
-
|
|
|
+//校对表格
|
|
|
func (table *Table) Adjust() {
|
|
|
- table.TDNum = func() int {
|
|
|
- n := 0
|
|
|
- for _, tr := range table.TRs {
|
|
|
- n += len(tr.TDs)
|
|
|
- }
|
|
|
- return n
|
|
|
- }()
|
|
|
- //有多少行
|
|
|
- table.RowNum = len(table.TRs)
|
|
|
- // for k1, tr := range table.TRs {
|
|
|
- // for k2, td := range tr.TDs {
|
|
|
- // qutil.Debug(k1, k2, td.Val, td.Rowspan, td.Colspan, td.ColPos, tr.RowPos)
|
|
|
- // }
|
|
|
- // }
|
|
|
//计算行列起止位置,跨行跨列处理
|
|
|
table.ComputeRowColSpan()
|
|
|
// for k1, tr := range table.TRs {
|
|
@@ -891,10 +899,12 @@ func (table *Table) Adjust() {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-//计算行/列表格的结束位置 StartRow=0 EndRow=0
|
|
|
+//计算行/列表格的结束位置 StartRow=0 EndRow=0,table.TDNum td个数 table.RowNum 行数
|
|
|
func (table *Table) ComputeRowColSpan() {
|
|
|
+ n := 0//td总个数
|
|
|
mapRC := map[int]map[int]int{} //记录第几行pos,起始列对应的合并值
|
|
|
for k, v := range table.TRs {
|
|
|
+ n += len(v.TDs)//每行的td总数相加
|
|
|
nk := 0 //nk列的起始,k行的起始||如果有合并,起始就不是0
|
|
|
ball := true
|
|
|
rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan
|
|
@@ -956,12 +966,14 @@ func (table *Table) ComputeRowColSpan() {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ table.TDNum = n//td总个数
|
|
|
+ table.RowNum = len(table.TRs)//tr总行数
|
|
|
}
|
|
|
|
|
|
func fmtkey(t string, start, end int) string {
|
|
|
return fmt.Sprintf("%s_%d_%d", t, start, end)
|
|
|
}
|
|
|
-
|
|
|
+//查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
|
|
|
func (table *Table) FindTag() {
|
|
|
//查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
|
|
|
if table.Tag != "" {
|
|
@@ -1039,7 +1051,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
|
|
|
}
|
|
|
for _, td := range tr.TDs {
|
|
|
if ball {
|
|
|
- td.BH = true
|
|
|
+ //td.BH = true
|
|
|
td.KeyDirect = 1
|
|
|
td.KVDirect = 2
|
|
|
}
|
|
@@ -1053,7 +1065,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
|
|
|
for _, td1 := range tr1.TDs {
|
|
|
if td1.StartCol == 0 {
|
|
|
if !MoneyReg.MatchString(td1.Val) {
|
|
|
- td1.BH = true
|
|
|
+ //td1.BH = true
|
|
|
td1.KeyDirect = 2
|
|
|
td1.KVDirect = 1
|
|
|
}
|
|
@@ -1067,7 +1079,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
|
|
|
bkeyfirstrow = true
|
|
|
for _, td := range tr.TDs {
|
|
|
if !MoneyReg.MatchString(td.Val) {
|
|
|
- td.BH = true
|
|
|
+ //td.BH = true
|
|
|
td.KeyDirect = 1
|
|
|
td.KVDirect = 2
|
|
|
}
|
|
@@ -1169,7 +1181,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
|
|
|
if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
|
|
|
td.KeyDirect = 1
|
|
|
td.KVDirect = 2
|
|
|
- td.BH = true
|
|
|
+ //td.BH = true
|
|
|
}
|
|
|
}
|
|
|
} else {
|
|
@@ -1186,7 +1198,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
|
|
|
if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
|
|
|
td.KeyDirect = 2
|
|
|
td.KVDirect = 1
|
|
|
- td.BH = true
|
|
|
+ //td.BH = true
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -1338,7 +1350,7 @@ func (table *Table) FindKV() {
|
|
|
// qutil.Debug("----", "td.BH:", td.BH, "KVDirect:", td.KVDirect, "Val:", td.Val, "direct:", direct, "vdirect:", vdirect)
|
|
|
// }
|
|
|
if !td.BH && td.KVDirect < 3 {
|
|
|
- if !table.FindTdVal(td, direct, vdirect) {
|
|
|
+ if !table.FindTdVal(td, direct, vdirect) {//table.FindTdVal()存储了table.SortKV
|
|
|
if !table.FindTdVal(td, vdirect, direct) {
|
|
|
//都识别不到时,对第一、二中标候选人的处理
|
|
|
bo, res := GetBidOrder(td, bodirect, sort)
|
|
@@ -1758,6 +1770,7 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
|
|
|
key_index := -1
|
|
|
hasPkgTd := map[string]bool{}
|
|
|
for in, k := range tn.SortKV.Keys {
|
|
|
+ //涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)就跳过
|
|
|
if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) {
|
|
|
continue
|
|
|
}
|
|
@@ -1802,7 +1815,7 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
|
|
|
}
|
|
|
}
|
|
|
} else if v1, ok := v.(string); ok {
|
|
|
- v1 = replPkgConfusion(v1)
|
|
|
+ v1 = replPkgConfusion(v1)//替换分包中混淆的词
|
|
|
if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
|
|
|
key_index = in
|
|
|
index = append(index, FindVal_1.FindString(v1))
|
|
@@ -2291,7 +2304,7 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
|
|
|
LS:
|
|
|
for _, tr := range tn.TRs {
|
|
|
for td_index, td := range tr.TDs {
|
|
|
- thisTdKvs := colonkvEntity.GetKvs(td.Text, "", 2)
|
|
|
+ thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2)//获取有序的kv
|
|
|
if len(thisTdKvs) == 0 {
|
|
|
tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
|
|
|
if tdValue != "" && len([]rune(tdValue)) < 10 {
|
|
@@ -2304,7 +2317,7 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
|
|
|
if len(thisTdKvs) != 1 {
|
|
|
continue
|
|
|
}
|
|
|
- //采购人在联系人、电话后面的处理
|
|
|
+ //采购人在联系人、电话后面的处理//采购单位,代理机构
|
|
|
td_k := FilterContactKey(thisTdKvs[0].Key)
|
|
|
td_k_length := len([]rune(td_k))
|
|
|
if td_k_length < 2 || td_k_length > 15 {
|
|
@@ -2321,6 +2334,7 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
|
|
|
if isContinue {
|
|
|
continue
|
|
|
}
|
|
|
+ //采购单位,代理机构
|
|
|
for _, k := range HasOrderContactType(td_k) {
|
|
|
if !ContactType[k].MatchString(td_k) {
|
|
|
continue
|
|
@@ -2399,9 +2413,9 @@ L:
|
|
|
jumpNextTd = false
|
|
|
}
|
|
|
///////////////////////////////////////
|
|
|
- thisTdKvs := kvAfterDivideBlock(td.Text, 3)
|
|
|
+ thisTdKvs := kvAfterDivideBlock(td.Text, 3)//分块之后的kv
|
|
|
if len(thisTdKvs) == 0 {
|
|
|
- thisTdKvs = colonkvEntity.GetKvs(td.Text, "", 2)
|
|
|
+ thisTdKvs = colonkvEntity.GetKvs(td.Text, tn.Desc, 2)
|
|
|
}
|
|
|
if len(thisTdKvs) == 0 {
|
|
|
tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
|
|
@@ -2531,7 +2545,7 @@ L:
|
|
|
}
|
|
|
if len(indexMap) == 0 {
|
|
|
prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
|
|
|
- for k, v := range ContactType {
|
|
|
+ for k, v := range ContactType {//采购单位,代理机构正则
|
|
|
if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
|
|
|
indexMap[thidTdIndex] = k
|
|
|
thisTrHasMatch = true
|
|
@@ -2680,80 +2694,9 @@ func (table *Table) analyBrand() {
|
|
|
lineMapArr := make(map[string]*SortMap)
|
|
|
lineMap := make(map[string]*SortMap)
|
|
|
brandRule := u.BrandRules
|
|
|
- //将val为数组和string的分开
|
|
|
- for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|
|
|
- val := table.SortKV.Map[key]
|
|
|
- key = regReplAllSpace.ReplaceAllString(key, "")
|
|
|
- key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
|
- if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
|
|
|
- /*
|
|
|
- {
|
|
|
- "商品":["",""],
|
|
|
- "商品_"["",""],
|
|
|
- }
|
|
|
- */
|
|
|
- valArr, allempty := filterVal(realTypeVal...) //过滤数据
|
|
|
- if allempty {
|
|
|
- continue
|
|
|
- }
|
|
|
- realTypeVal = valArr
|
|
|
- line := underline.FindString(key)
|
|
|
- lineValMap1 := lineMapArr[line]
|
|
|
- // i := 1
|
|
|
- // L:
|
|
|
- // for { //去除数组空数据
|
|
|
- // last := realTypeVal[len(realTypeVal)-i]
|
|
|
- // if last == "" {
|
|
|
- // i++
|
|
|
- // if i > len(realTypeVal) {
|
|
|
- // break
|
|
|
- // }
|
|
|
- // goto L
|
|
|
- // } else {
|
|
|
- // break
|
|
|
- // }
|
|
|
- // }
|
|
|
- // dislodgeNull := realTypeVal[:(len(realTypeVal) - i + 1)] //去除数组中空数据
|
|
|
- if len(realTypeVal) > 0 {
|
|
|
- if lineValMap1 == nil {
|
|
|
- tmp := NewSortMap()
|
|
|
- tmp.AddKey(key, realTypeVal)
|
|
|
- lineMapArr[line] = tmp
|
|
|
- } else {
|
|
|
- lineValMap1.AddKey(key, realTypeVal)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- //qutil.Debug("lineMapArr---", lineMapArr[line].Keys, lineMapArr[line].Map)
|
|
|
- } else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"}
|
|
|
- /*
|
|
|
- {
|
|
|
- "商品:"",名称:"",
|
|
|
- "商品_:"",名称_:"",
|
|
|
- "商品__:"",名称__:"",
|
|
|
- }
|
|
|
- */
|
|
|
- valArr, allempty := filterVal(realTypeVal) //过滤数据
|
|
|
- if allempty {
|
|
|
- continue
|
|
|
- }
|
|
|
- realTypeVal = valArr[0]
|
|
|
- line := underline.FindString(key)
|
|
|
- lineValMap2 := lineMap[line]
|
|
|
- if lineValMap2 == nil {
|
|
|
- tmp := NewSortMap()
|
|
|
- tmp.AddKey(key, realTypeVal)
|
|
|
- lineMap[line] = tmp
|
|
|
- } else {
|
|
|
- lineValMap2.AddKey(key, realTypeVal)
|
|
|
- }
|
|
|
- //qutil.Debug("lineMap---", lineMap[line].Keys, lineMap[line].Map)
|
|
|
- } else {
|
|
|
- // "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409
|
|
|
- //成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
|
|
|
- //fmt.Println("err data:", key, val)
|
|
|
- }
|
|
|
- }
|
|
|
+ //初始化lineMapArr,lineMap;
|
|
|
+ initLineMapLineMapArr(table, lineMapArr, lineMap)
|
|
|
+ //添加table.BrandData
|
|
|
//处理数组数据后,匹配必须title和替换要保存的title
|
|
|
//qutil.Debug("lineMapArr----", len(lineMapArr))
|
|
|
if len(lineMapArr) > 0 {
|
|
@@ -2855,7 +2798,7 @@ func (table *Table) analyBrand() {
|
|
|
delete(finishKa, "unitprice")
|
|
|
}
|
|
|
finishData := dealArrData(maxNum, finishKa)
|
|
|
- table.BrandData = append(table.BrandData, finishData)
|
|
|
+ table.BrandData = append(table.BrandData, finishData) //修改了table.BrandData
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -2949,6 +2892,83 @@ func (table *Table) analyBrand() {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+//初始化lineMapArr,lineMap
|
|
|
+func initLineMapLineMapArr(table *Table, lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) {
|
|
|
+ //将val为数组和string的分开
|
|
|
+ for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|
|
|
+ val := table.SortKV.Map[key]
|
|
|
+ key = regReplAllSpace.ReplaceAllString(key, "")
|
|
|
+ key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
|
+ if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
|
|
|
+ /*
|
|
|
+ {
|
|
|
+ "商品":["",""],
|
|
|
+ "商品_"["",""],
|
|
|
+ }
|
|
|
+ */
|
|
|
+ valArr, allempty := filterVal(realTypeVal...) //过滤数据
|
|
|
+ if allempty {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ realTypeVal = valArr
|
|
|
+ line := underline.FindString(key)
|
|
|
+ lineValMap1 := lineMapArr[line]
|
|
|
+ // i := 1
|
|
|
+ // L:
|
|
|
+ // for { //去除数组空数据
|
|
|
+ // last := realTypeVal[len(realTypeVal)-i]
|
|
|
+ // if last == "" {
|
|
|
+ // i++
|
|
|
+ // if i > len(realTypeVal) {
|
|
|
+ // break
|
|
|
+ // }
|
|
|
+ // goto L
|
|
|
+ // } else {
|
|
|
+ // break
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // dislodgeNull := realTypeVal[:(len(realTypeVal) - i + 1)] //去除数组中空数据
|
|
|
+ if len(realTypeVal) > 0 {
|
|
|
+ if lineValMap1 == nil {
|
|
|
+ tmp := NewSortMap()
|
|
|
+ tmp.AddKey(key, realTypeVal)
|
|
|
+ lineMapArr[line] = tmp
|
|
|
+ } else {
|
|
|
+ lineValMap1.AddKey(key, realTypeVal)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //qutil.Debug("lineMapArr---", lineMapArr[line].Keys, lineMapArr[line].Map)
|
|
|
+ } else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"}
|
|
|
+ /*
|
|
|
+ {
|
|
|
+ "商品:"",名称:"",
|
|
|
+ "商品_:"",名称_:"",
|
|
|
+ "商品__:"",名称__:"",
|
|
|
+ }
|
|
|
+ */
|
|
|
+ valArr, allempty := filterVal(realTypeVal) //过滤数据
|
|
|
+ if allempty {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ realTypeVal = valArr[0]
|
|
|
+ line := underline.FindString(key)
|
|
|
+ lineValMap2 := lineMap[line]
|
|
|
+ if lineValMap2 == nil {
|
|
|
+ tmp := NewSortMap()
|
|
|
+ tmp.AddKey(key, realTypeVal)
|
|
|
+ lineMap[line] = tmp
|
|
|
+ } else {
|
|
|
+ lineValMap2.AddKey(key, realTypeVal)
|
|
|
+ }
|
|
|
+ //qutil.Debug("lineMap---", lineMap[line].Keys, lineMap[line].Map)
|
|
|
+ } else {
|
|
|
+ // "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409
|
|
|
+ //成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
|
|
|
+ //fmt.Println("err data:", key, val)
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
func dealArrData(maxNum int, ka map[string][]string) []map[string]string {
|
|
|
for k2, v2 := range ka {
|
|
@@ -3252,3 +3272,7 @@ func dealNumber(val ...string) ([]string, []string) {
|
|
|
}
|
|
|
return result, unitnameArr
|
|
|
}
|
|
|
+
|
|
|
+func (tn *Table) analyProNameAndItemNumber(){
|
|
|
+
|
|
|
+}
|