|
@@ -88,7 +88,7 @@ var (
|
|
|
|
|
|
//Tg = map[string]interface{}{}
|
|
//Tg = map[string]interface{}{}
|
|
//一些表格没有表头,是空的,对值是排序的做处理对应 NullTxBid
|
|
//一些表格没有表头,是空的,对值是排序的做处理对应 NullTxBid
|
|
- NullTdReg = regexp.MustCompile("(首选|第[一二三四五1-5])(中标|成交)?(名|(候选|排序)?(人|单位|供应商))")
|
|
|
|
|
|
+ NullTdReg = regexp.MustCompile("(首选|第[一二三四五1-5])(中标|成交)?(名(称)?|(候选|排序)?(人|单位|供应商))")
|
|
NullTxtBid = "成交供应商排名"
|
|
NullTxtBid = "成交供应商排名"
|
|
projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
|
|
projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
|
|
MhSpilt = regexp.MustCompile("[::]")
|
|
MhSpilt = regexp.MustCompile("[::]")
|
|
@@ -105,6 +105,8 @@ var (
|
|
FilterSerial = regexp.MustCompile(".+[、..::,]")
|
|
FilterSerial = regexp.MustCompile(".+[、..::,]")
|
|
filterTableWror = regexp.MustCompile("班子成员")
|
|
filterTableWror = regexp.MustCompile("班子成员")
|
|
underline = regexp.MustCompile("_+$")
|
|
underline = regexp.MustCompile("_+$")
|
|
|
|
+ iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果)")
|
|
|
|
+ nswinnertabletag = regexp.MustCompile("[评得分估]+")
|
|
)
|
|
)
|
|
|
|
|
|
//在解析时,判断表格元素是否隐藏
|
|
//在解析时,判断表格元素是否隐藏
|
|
@@ -196,11 +198,16 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1 []string,
|
|
//对解析后的表格的kv进行过滤
|
|
//对解析后的表格的kv进行过滤
|
|
func (table *Table) KVFilter() {
|
|
func (table *Table) KVFilter() {
|
|
//1.标准化值查找
|
|
//1.标准化值查找
|
|
- // 2.对数组的处理
|
|
|
|
- // 3.对分包的处理
|
|
|
|
- // 4.对KV的处理
|
|
|
|
- // 判断表格是否有用,调用abandontable正则数组进行判断
|
|
|
|
- table.analyTdKV()//1.td元素有内嵌kv,遍历放入table的Kv中2.td有子表格的处理,中标候选人排序
|
|
|
|
|
|
+ //2.对数组的处理
|
|
|
|
+ //3.对分包的处理
|
|
|
|
+ //4.对KV的处理
|
|
|
|
+ //判断表格是否有用,调用abandontable正则数组进行判断
|
|
|
|
+ //遍历每一行
|
|
|
|
+ winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
|
|
|
|
+ if !winnertag {
|
|
|
|
+ winnertag = iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
|
|
|
|
+ }
|
|
|
|
+ table.analyTdKV()
|
|
as := NewSortMap()
|
|
as := NewSortMap()
|
|
//表格描述处理,对成交结果的处理
|
|
//表格描述处理,对成交结果的处理
|
|
for _, k := range table.SortKV.Keys {
|
|
for _, k := range table.SortKV.Keys {
|
|
@@ -214,11 +221,10 @@ func (table *Table) KVFilter() {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
v := table.SortKV.Map[k]
|
|
v := table.SortKV.Map[k]
|
|
- //u.Debug(k, v)
|
|
|
|
if _, ok := v.(string); ok {
|
|
if _, ok := v.(string); ok {
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
- k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v)//对key标准化处理,没有找到会走中标
|
|
|
|
- //u.Debug(k, v, k1, w1, v1, tag, b)
|
|
|
|
|
|
+ k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
|
|
+ //qutil.Debug(k, v, k1, w1, v1, tag, b)
|
|
if b {
|
|
if b {
|
|
//降低冒号值的权重
|
|
//降低冒号值的权重
|
|
if MhSpilt.MatchString(v1) {
|
|
if MhSpilt.MatchString(v1) {
|
|
@@ -247,6 +253,7 @@ func (table *Table) KVFilter() {
|
|
as.AddKey(k, v)
|
|
as.AddKey(k, v)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
//处理值是数组的kv放入标准化kv中
|
|
//处理值是数组的kv放入标准化kv中
|
|
checkKey := map[int]bool{}
|
|
checkKey := map[int]bool{}
|
|
for kn, k := range as.Keys {
|
|
for kn, k := range as.Keys {
|
|
@@ -264,12 +271,17 @@ func (table *Table) KVFilter() {
|
|
for n1, _ := range vs1 {
|
|
for n1, _ := range vs1 {
|
|
smap[n1] = map[string]interface{}{}
|
|
smap[n1] = map[string]interface{}{}
|
|
}
|
|
}
|
|
|
|
+ //hadSort := false
|
|
|
|
+ tmpEntname := make([]string, len(vs1))
|
|
|
|
+ tmpPrice := make([]string, len(vs1))
|
|
for kn1, k := range as.Keys[kn:] {
|
|
for kn1, k := range as.Keys[kn:] {
|
|
v := as.Map[k]
|
|
v := as.Map[k]
|
|
if ContactType["采购单位"].MatchString(k) || ContactType["代理机构"].MatchString(k) {
|
|
if ContactType["采购单位"].MatchString(k) || ContactType["代理机构"].MatchString(k) {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
- if vs, ok := v.([]string); ok && len(vs) == len(vs1) {
|
|
|
|
|
|
+ //目前对数组数据的key做判断,但是某些额可以是不满足情况的
|
|
|
|
+ //载明内容:[第一中标候选人 第二中标候选人] id:5d00587da5cb26b9b75e367b
|
|
|
|
+ if vs, ok := v.([]string); ok && len(vs) == len(vs1) { //数组值的个数相同
|
|
res, _, _, _, repl := CheckCommon(k, "bidorder")
|
|
res, _, _, _, repl := CheckCommon(k, "bidorder")
|
|
kv := ""
|
|
kv := ""
|
|
if !res {
|
|
if !res {
|
|
@@ -278,23 +290,72 @@ func (table *Table) KVFilter() {
|
|
kv = kt[0].Value
|
|
kv = kt[0].Value
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+ //qutil.Debug(k, res, repl, kv, "--", vs)
|
|
|
|
+ if !res && kv == "" { //key未验证出,验证数组的val值
|
|
|
|
+ checkKey[kn+kn1] = true
|
|
|
|
+ if winnertag { //如果是中标信息 在根据val数组信息解析候选人
|
|
|
|
+ for vsk, vsv := range vs {
|
|
|
|
+ if NullTdReg.MatchString(vsv) { //数据先验证val是否有排序
|
|
|
|
+ //hadSort = true
|
|
|
|
+ smap[vsk]["sortstr"] = vsv
|
|
|
|
+ smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
|
|
|
|
+ } else if findCandidate2.MatchString(vsv) && tmpEntname[vsk] == "" { //数据验证val是否是候选人
|
|
|
|
+ entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
|
|
|
|
+ if entname != "" {
|
|
|
|
+ tmpEntname[vsk] = entname
|
|
|
|
+ }
|
|
|
|
+ } else { //验证val时如果数组中的第一条数据既不满足sort或者entname 判定此数组数据错误
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
if res || kv != "" { //连续往下找几个key
|
|
if res || kv != "" { //连续往下找几个key
|
|
checkKey[kn+kn1] = true
|
|
checkKey[kn+kn1] = true
|
|
|
|
+ SORT:
|
|
if repl == "sort" {
|
|
if repl == "sort" {
|
|
|
|
+ //hadSort = true
|
|
for vsk, vsv := range vs {
|
|
for vsk, vsv := range vs {
|
|
smap[vsk]["sortstr"] = vsv
|
|
smap[vsk]["sortstr"] = vsv
|
|
smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
|
|
smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
|
|
}
|
|
}
|
|
} else if repl == "entname" || kv == "中标单位" {
|
|
} else if repl == "entname" || kv == "中标单位" {
|
|
for vsk, vsv := range vs {
|
|
for vsk, vsv := range vs {
|
|
- smap[vsk]["entname"] = winnerOrderEntity.clear("中标单位", vsv)
|
|
|
|
|
|
+ if winnerReg6.MatchString(vsv) { //k:中标候选人 v:["第一名","第二名"]
|
|
|
|
+ repl = "sort"
|
|
|
|
+ goto SORT
|
|
|
|
+ }
|
|
|
|
+ // if entname, _ := smap[vsk]["entname"].(string); entname != "" || len([]rune(vsv)) < 3 {
|
|
|
|
+ // break
|
|
|
|
+ // }
|
|
|
|
+ // entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
|
|
|
|
+ // if entname != "" {
|
|
|
|
+ // smap[vsk]["entname"] = entname
|
|
|
|
+ //
|
|
|
|
+ if tmpEntname[vsk] != "" || len([]rune(vsv)) < 4 { //排除 单位:["台","个","套"]
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
|
|
|
|
+ if entname != "" {
|
|
|
|
+ tmpEntname[vsk] = entname
|
|
|
|
+ }
|
|
}
|
|
}
|
|
} else if kv == "中标金额" {
|
|
} else if kv == "中标金额" {
|
|
for vsk, vsv := range vs {
|
|
for vsk, vsv := range vs {
|
|
- p1 := qutil.Float64All(smap[vsk]["price"])
|
|
|
|
- p2 := qutil.Float64All(vsv)
|
|
|
|
|
|
+ //过滤price 2348273.432元(万元)-->2348273.432
|
|
|
|
+ //tmp1, _ := smap[vsk]["price"].(string)
|
|
|
|
+ tmp1 := tmpPrice[vsk]
|
|
|
|
+ p1num := numberReg2.FindString(tmp1)
|
|
|
|
+ p2num := numberReg2.FindString(vsv)
|
|
|
|
+ p1 := qutil.Float64All(p1num)
|
|
|
|
+ p2 := qutil.Float64All(p2num)
|
|
if p2 > p1 {
|
|
if p2 > p1 {
|
|
- smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
|
|
|
|
|
|
+ //smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
|
|
|
|
+ price := winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
|
|
|
|
+ if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 {
|
|
|
|
+ tmpPrice[vsk] = pricestr
|
|
|
|
+ }
|
|
|
|
+
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -304,8 +365,22 @@ func (table *Table) KVFilter() {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
newSmap := []map[string]interface{}{}
|
|
newSmap := []map[string]interface{}{}
|
|
- for _, smap_v := range smap {
|
|
|
|
- if len(smap_v) > 0 {
|
|
|
|
|
|
+ //qutil.Debug("smap=======", smap)
|
|
|
|
+ //qutil.Debug("tmpEntname--", len(tmpEntname), tmpEntname)
|
|
|
|
+ //qutil.Debug("tmpPrice--", len(tmpPrice), tmpPrice)
|
|
|
|
+ for n, smap_v := range smap {
|
|
|
|
+ //if hadSort { //有排序,再添加entname和price
|
|
|
|
+ if len(tmpEntname) > 0 && n < len(tmpEntname) && tmpEntname[n] != "" {
|
|
|
|
+ smap_v["entname"] = tmpEntname[n]
|
|
|
|
+
|
|
|
|
+ if len(tmpPrice) > 0 && n < len(tmpPrice) && tmpPrice[n] != "" {
|
|
|
|
+ smap_v["price"] = tmpPrice[n]
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ //} else if len(tmpEntname) > 0 {
|
|
|
|
+ //fmt.Println("table winnerorder only has entname", tmpEntname)
|
|
|
|
+ //}
|
|
|
|
+ if len(smap_v) > 2 { //只有排序信息 sort和sortstr
|
|
newSmap = append(newSmap, smap_v)
|
|
newSmap = append(newSmap, smap_v)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -384,7 +459,7 @@ func (table *Table) KVFilter() {
|
|
table.StandKVWeight["中标单位"] = -25
|
|
table.StandKVWeight["中标单位"] = -25
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- } else if !table.BPackage {
|
|
|
|
|
|
+ } else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
|
|
if len(winnerOrder) > 1 {
|
|
if len(winnerOrder) > 1 {
|
|
table.WinnerOrder = winnerOrder
|
|
table.WinnerOrder = winnerOrder
|
|
}
|
|
}
|
|
@@ -404,6 +479,7 @@ func (table *Table) KVFilter() {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
//1.td元素有内嵌kv,遍历放入table的Kv中2.td有子表格的处理,中标候选人排序
|
|
//1.td元素有内嵌kv,遍历放入table的Kv中2.td有子表格的处理,中标候选人排序
|
|
func (table *Table) analyTdKV() {
|
|
func (table *Table) analyTdKV() {
|
|
//遍历每一行
|
|
//遍历每一行
|
|
@@ -511,6 +587,7 @@ func (table *Table) MergerToTableresult() {
|
|
if table.TableResult.BlockTag == "" && table.Tag != "" {
|
|
if table.TableResult.BlockTag == "" && table.Tag != "" {
|
|
table.TableResult.BlockTag = table.Tag
|
|
table.TableResult.BlockTag = table.Tag
|
|
}
|
|
}
|
|
|
|
+ //中标候选人(多个table,现在默认取第一个table的信息,考虑需不需要多个table分析合并数据?)
|
|
if table.TableResult.WinnerOrder == nil || len(table.TableResult.WinnerOrder) == 0 {
|
|
if table.TableResult.WinnerOrder == nil || len(table.TableResult.WinnerOrder) == 0 {
|
|
table.TableResult.WinnerOrder = table.WinnerOrder
|
|
table.TableResult.WinnerOrder = table.WinnerOrder
|
|
}
|
|
}
|
|
@@ -528,7 +605,7 @@ func (table *Table) MergerToTableresult() {
|
|
解析表格入口
|
|
解析表格入口
|
|
返回:汇总表格对象
|
|
返回:汇总表格对象
|
|
**/
|
|
**/
|
|
-func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}) (tabres *TableResult) {
|
|
|
|
|
|
+func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock) (tabres *TableResult) {
|
|
defer qutil.Catch()
|
|
defer qutil.Catch()
|
|
//u.Debug(con)
|
|
//u.Debug(con)
|
|
if itype == 1 {
|
|
if itype == 1 {
|
|
@@ -536,7 +613,7 @@ func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, ityp
|
|
con = RepairCon(con)
|
|
con = RepairCon(con)
|
|
}
|
|
}
|
|
//生成tableresult对象
|
|
//生成tableresult对象
|
|
- tabres = NewTableResult(_id, toptype, blockTag, con, itype)
|
|
|
|
|
|
+ tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
|
|
//可以有多个table
|
|
//可以有多个table
|
|
for _, table := range tabs {
|
|
for _, table := range tabs {
|
|
//隐藏表格跳过
|
|
//隐藏表格跳过
|
|
@@ -638,7 +715,7 @@ func (ts *TableResult) Analy() {
|
|
for _, table := range tabs {
|
|
for _, table := range tabs {
|
|
table.MergerToTableresult()
|
|
table.MergerToTableresult()
|
|
// for k, v := range table.TableResult.SortKV.Map {
|
|
// for k, v := range table.TableResult.SortKV.Map {
|
|
- // log.Println(k, v)
|
|
|
|
|
|
+ // qutil.Debug(k, "=====", v)
|
|
// }
|
|
// }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -658,9 +735,10 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
|
|
ts := table.AnalyTables(contactFormat)
|
|
ts := table.AnalyTables(contactFormat)
|
|
return ts
|
|
return ts
|
|
}
|
|
}
|
|
|
|
+
|
|
//遍历节点,初始化table 结构体
|
|
//遍历节点,初始化table 结构体
|
|
func (table *Table) createTabe(trs *goquery.Selection) {
|
|
func (table *Table) createTabe(trs *goquery.Selection) {
|
|
- trs.Each(func(n int, sel *goquery.Selection) {
|
|
|
|
|
|
+ trs.Each(func(n int, sel *goquery.Selection) {
|
|
//隐藏行不处理
|
|
//隐藏行不处理
|
|
if IsHide(sel) {
|
|
if IsHide(sel) {
|
|
return
|
|
return
|
|
@@ -691,7 +769,7 @@ func (table *Table) createTabe(trs *goquery.Selection) {
|
|
|
|
|
|
//对table进行整体解析处理
|
|
//对table进行整体解析处理
|
|
func (table *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
func (table *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
- ts := table.tableSubDemolitionTable()//分包,拆表
|
|
|
|
|
|
+ ts := table.tableSubDemolitionTable() //分包,拆表
|
|
for n, table := range ts {
|
|
for n, table := range ts {
|
|
//处理每个table
|
|
//处理每个table
|
|
if len(table.TRs) > 0 {
|
|
if len(table.TRs) > 0 {
|
|
@@ -733,7 +811,7 @@ func (table *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
|
|
//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
|
|
table.KVFilter()
|
|
table.KVFilter()
|
|
}
|
|
}
|
|
- for k, v := range table.StandKV {//过滤后的标准化kv
|
|
|
|
|
|
+ for k, v := range table.StandKV { //过滤后的标准化kv
|
|
if table.TableResult.SortKV.Map[k] == nil {
|
|
if table.TableResult.SortKV.Map[k] == nil {
|
|
table.TableResult.SortKV.AddKey(k, v)
|
|
table.TableResult.SortKV.AddKey(k, v)
|
|
table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
|
|
@@ -744,6 +822,7 @@ func (table *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
}
|
|
}
|
|
return ts
|
|
return ts
|
|
}
|
|
}
|
|
|
|
+
|
|
//分包,拆表
|
|
//分包,拆表
|
|
func (table *Table) tableSubDemolitionTable() []*Table {
|
|
func (table *Table) tableSubDemolitionTable() []*Table {
|
|
tm := []map[string]interface{}{}
|
|
tm := []map[string]interface{}{}
|
|
@@ -801,8 +880,9 @@ func (table *Table) tableSubDemolitionTable() []*Table {
|
|
}
|
|
}
|
|
return ts
|
|
return ts
|
|
}
|
|
}
|
|
|
|
+
|
|
//分割表格
|
|
//分割表格
|
|
-func (table *Table) bSplit( n int, ts []*Table) {
|
|
|
|
|
|
+func (table *Table) bSplit(n int, ts []*Table) {
|
|
if table.BSplit {
|
|
if table.BSplit {
|
|
if !table.BHeader && n > 0 {
|
|
if !table.BHeader && n > 0 {
|
|
for i := n - 1; i > -1; i-- {
|
|
for i := n - 1; i > -1; i-- {
|
|
@@ -818,6 +898,7 @@ func (table *Table) bSplit( n int, ts []*Table) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
//删除尾部空白行
|
|
//删除尾部空白行
|
|
func (table *Table) deleteTrimTr() {
|
|
func (table *Table) deleteTrimTr() {
|
|
for len(table.TRs) > 0 {
|
|
for len(table.TRs) > 0 {
|
|
@@ -838,6 +919,7 @@ func (table *Table) deleteTrimTr() {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
//校对表格
|
|
//校对表格
|
|
func (table *Table) Adjust() {
|
|
func (table *Table) Adjust() {
|
|
//计算行列起止位置,跨行跨列处理
|
|
//计算行列起止位置,跨行跨列处理
|
|
@@ -901,11 +983,11 @@ func (table *Table) Adjust() {
|
|
|
|
|
|
//计算行/列表格的结束位置 StartRow=0 EndRow=0,table.TDNum td个数 table.RowNum 行数
|
|
//计算行/列表格的结束位置 StartRow=0 EndRow=0,table.TDNum td个数 table.RowNum 行数
|
|
func (table *Table) ComputeRowColSpan() {
|
|
func (table *Table) ComputeRowColSpan() {
|
|
- n := 0//td总个数
|
|
|
|
|
|
+ n := 0 //td总个数
|
|
mapRC := map[int]map[int]int{} //记录第几行pos,起始列对应的合并值
|
|
mapRC := map[int]map[int]int{} //记录第几行pos,起始列对应的合并值
|
|
for k, v := range table.TRs {
|
|
for k, v := range table.TRs {
|
|
- n += len(v.TDs)//每行的td总数相加
|
|
|
|
- nk := 0 //nk列的起始,k行的起始||如果有合并,起始就不是0
|
|
|
|
|
|
+ n += len(v.TDs) //每行的td总数相加
|
|
|
|
+ nk := 0 //nk列的起始,k行的起始||如果有合并,起始就不是0
|
|
ball := true
|
|
ball := true
|
|
rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan
|
|
rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan
|
|
for _, v1 := range v.TDs {
|
|
for _, v1 := range v.TDs {
|
|
@@ -966,13 +1048,14 @@ func (table *Table) ComputeRowColSpan() {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- table.TDNum = n//td总个数
|
|
|
|
- table.RowNum = len(table.TRs)//tr总行数
|
|
|
|
|
|
+ table.TDNum = n //td总个数
|
|
|
|
+ table.RowNum = len(table.TRs) //tr总行数
|
|
}
|
|
}
|
|
|
|
|
|
func fmtkey(t string, start, end int) string {
|
|
func fmtkey(t string, start, end int) string {
|
|
return fmt.Sprintf("%s_%d_%d", t, start, end)
|
|
return fmt.Sprintf("%s_%d_%d", t, start, end)
|
|
}
|
|
}
|
|
|
|
+
|
|
//查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
|
|
//查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
|
|
func (table *Table) FindTag() {
|
|
func (table *Table) FindTag() {
|
|
//查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
|
|
//查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
|
|
@@ -1347,10 +1430,10 @@ func (table *Table) FindKV() {
|
|
}
|
|
}
|
|
**/
|
|
**/
|
|
// if td.Val == "电视" || td.Val == "电话机" || td.Val == "传真机" || td.Val == "音响" {
|
|
// if td.Val == "电视" || td.Val == "电话机" || td.Val == "传真机" || td.Val == "音响" {
|
|
- // qutil.Debug("----", "td.BH:", td.BH, "KVDirect:", td.KVDirect, "Val:", td.Val, "direct:", direct, "vdirect:", vdirect)
|
|
|
|
|
|
+ //qutil.Debug("----td.Valtype", td.Valtype, "td.BH:", td.BH, "KVDirect:", td.KVDirect, "Val:", td.Val, "direct:", direct, "vdirect:", vdirect)
|
|
// }
|
|
// }
|
|
if !td.BH && td.KVDirect < 3 {
|
|
if !td.BH && td.KVDirect < 3 {
|
|
- if !table.FindTdVal(td, direct, vdirect) {//table.FindTdVal()存储了table.SortKV
|
|
|
|
|
|
+ if !table.FindTdVal(td, direct, vdirect) { //table.FindTdVal()存储了table.SortKV
|
|
if !table.FindTdVal(td, vdirect, direct) {
|
|
if !table.FindTdVal(td, vdirect, direct) {
|
|
//都识别不到时,对第一、二中标候选人的处理
|
|
//都识别不到时,对第一、二中标候选人的处理
|
|
bo, res := GetBidOrder(td, bodirect, sort)
|
|
bo, res := GetBidOrder(td, bodirect, sort)
|
|
@@ -1576,6 +1659,11 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
|
|
// if near != nil {
|
|
// if near != nil {
|
|
// fmt.Println("near----", near.Val, td.Val)
|
|
// fmt.Println("near----", near.Val, td.Val)
|
|
// }
|
|
// }
|
|
|
|
+ // qutil.Debug(near != nil)
|
|
|
|
+ // qutil.Debug(near.BH)
|
|
|
|
+ // qutil.Debug(near.KeyDirect == vdirect, near.KeyDirect == 0)
|
|
|
|
+ // qutil.Debug(near.KVDirect == direct, near.KVDirect == 0)
|
|
|
|
+ // qutil.Debug(near.KVDirect < 3)
|
|
if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 {
|
|
if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 {
|
|
near.KVDirect = direct
|
|
near.KVDirect = direct
|
|
near.KeyDirect = vdirect
|
|
near.KeyDirect = vdirect
|
|
@@ -1703,7 +1791,6 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
|
|
}
|
|
}
|
|
b = true
|
|
b = true
|
|
}
|
|
}
|
|
- //qutil.Debug("map", b, table.SortKV.Map)
|
|
|
|
return
|
|
return
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1815,7 +1902,7 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else if v1, ok := v.(string); ok {
|
|
} else if v1, ok := v.(string); ok {
|
|
- v1 = replPkgConfusion(v1)//替换分包中混淆的词
|
|
|
|
|
|
+ v1 = replPkgConfusion(v1) //替换分包中混淆的词
|
|
if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
|
|
if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
|
|
key_index = in
|
|
key_index = in
|
|
index = append(index, FindVal_1.FindString(v1))
|
|
index = append(index, FindVal_1.FindString(v1))
|
|
@@ -1879,7 +1966,7 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
|
|
L:
|
|
L:
|
|
for in2, v1 := range vs {
|
|
for in2, v1 := range vs {
|
|
if len([]rune(v1)) < 20 && !moneyNum.MatchString(v1) && FindVal2_1.MatchString(v1) {
|
|
if len([]rune(v1)) < 20 && !moneyNum.MatchString(v1) && FindVal2_1.MatchString(v1) {
|
|
- for _, serial := range regSerialTitles_2 {
|
|
|
|
|
|
+ for _, serial := range tn.TableResult.RuleBlock.TitleRegs {
|
|
if serial.MatchString(v1) {
|
|
if serial.MatchString(v1) {
|
|
break L
|
|
break L
|
|
}
|
|
}
|
|
@@ -2035,8 +2122,8 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
|
|
//if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
|
|
//if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
|
|
// tn.SortKV.RemoveKey(k1)
|
|
// tn.SortKV.RemoveKey(k1)
|
|
//}
|
|
//}
|
|
- for _,vcgdw:=range k1tags{
|
|
|
|
- if vcgdw.Value =="采购单位"{
|
|
|
|
|
|
+ for _, vcgdw := range k1tags {
|
|
|
|
+ if vcgdw.Value == "采购单位" {
|
|
tn.SortKV.RemoveKey(k1)
|
|
tn.SortKV.RemoveKey(k1)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -2304,7 +2391,7 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
|
|
LS:
|
|
LS:
|
|
for _, tr := range tn.TRs {
|
|
for _, tr := range tn.TRs {
|
|
for td_index, td := range tr.TDs {
|
|
for td_index, td := range tr.TDs {
|
|
- thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2)//获取有序的kv
|
|
|
|
|
|
+ thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2) //获取有序的kv
|
|
if len(thisTdKvs) == 0 {
|
|
if len(thisTdKvs) == 0 {
|
|
tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
|
|
tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
|
|
if tdValue != "" && len([]rune(tdValue)) < 10 {
|
|
if tdValue != "" && len([]rune(tdValue)) < 10 {
|
|
@@ -2413,7 +2500,7 @@ L:
|
|
jumpNextTd = false
|
|
jumpNextTd = false
|
|
}
|
|
}
|
|
///////////////////////////////////////
|
|
///////////////////////////////////////
|
|
- thisTdKvs := kvAfterDivideBlock(td.Text, 3)//分块之后的kv
|
|
|
|
|
|
+ thisTdKvs := kvAfterDivideBlock(td.Text, 3, tn.TableResult.RuleBlock) //分块之后的kv
|
|
if len(thisTdKvs) == 0 {
|
|
if len(thisTdKvs) == 0 {
|
|
thisTdKvs = colonkvEntity.GetKvs(td.Text, tn.Desc, 2)
|
|
thisTdKvs = colonkvEntity.GetKvs(td.Text, tn.Desc, 2)
|
|
}
|
|
}
|
|
@@ -2545,7 +2632,7 @@ L:
|
|
}
|
|
}
|
|
if len(indexMap) == 0 {
|
|
if len(indexMap) == 0 {
|
|
prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
|
|
prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
|
|
- for k, v := range ContactType {//采购单位,代理机构正则
|
|
|
|
|
|
+ for k, v := range ContactType { //采购单位,代理机构正则
|
|
if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
|
|
if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
|
|
indexMap[thidTdIndex] = k
|
|
indexMap[thidTdIndex] = k
|
|
thisTrHasMatch = true
|
|
thisTrHasMatch = true
|
|
@@ -2695,9 +2782,7 @@ func (table *Table) analyBrand() {
|
|
lineMap := make(map[string]*SortMap)
|
|
lineMap := make(map[string]*SortMap)
|
|
brandRule := u.BrandRules
|
|
brandRule := u.BrandRules
|
|
//初始化lineMapArr,lineMap;
|
|
//初始化lineMapArr,lineMap;
|
|
- initLineMapLineMapArr(table, lineMapArr, lineMap)
|
|
|
|
- //添加table.BrandData
|
|
|
|
- //处理数组数据后,匹配必须title和替换要保存的title
|
|
|
|
|
|
+ initLineMapLineMapArr(table, lineMapArr, lineMap) //处理数组数据后,匹配必须title和替换要保存的title
|
|
//qutil.Debug("lineMapArr----", len(lineMapArr))
|
|
//qutil.Debug("lineMapArr----", len(lineMapArr))
|
|
if len(lineMapArr) > 0 {
|
|
if len(lineMapArr) > 0 {
|
|
for _, aMap := range lineMapArr {
|
|
for _, aMap := range lineMapArr {
|
|
@@ -2894,7 +2979,6 @@ func (table *Table) analyBrand() {
|
|
}
|
|
}
|
|
//初始化lineMapArr,lineMap
|
|
//初始化lineMapArr,lineMap
|
|
func initLineMapLineMapArr(table *Table, lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) {
|
|
func initLineMapLineMapArr(table *Table, lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) {
|
|
- //将val为数组和string的分开
|
|
|
|
for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|
|
for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|
|
val := table.SortKV.Map[key]
|
|
val := table.SortKV.Map[key]
|
|
key = regReplAllSpace.ReplaceAllString(key, "")
|
|
key = regReplAllSpace.ReplaceAllString(key, "")
|
|
@@ -2965,7 +3049,7 @@ func initLineMapLineMapArr(table *Table, lineMapArr map[string]*SortMap, lineMap
|
|
} else {
|
|
} else {
|
|
// "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409
|
|
// "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409
|
|
//成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
|
|
//成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
|
|
- //fmt.Println("err data:", key, val)
|
|
|
|
|
|
+ //qutil.Debug("err data:", key, val)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -3273,6 +3357,6 @@ func dealNumber(val ...string) ([]string, []string) {
|
|
return result, unitnameArr
|
|
return result, unitnameArr
|
|
}
|
|
}
|
|
|
|
|
|
-func (tn *Table) analyProNameAndItemNumber(){
|
|
|
|
|
|
+func (tn *Table) analyProNameAndItemNumber() {
|
|
|
|
|
|
-}
|
|
|
|
|
|
+}
|