|
@@ -159,9 +159,9 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (kvTags map[s
|
|
}
|
|
}
|
|
if winnerOrderAndBidResult.MatchString(tabletag) && t1.Value == "采购单位联系人" { //处理table中项目负责人
|
|
if winnerOrderAndBidResult.MatchString(tabletag) && t1.Value == "采购单位联系人" { //处理table中项目负责人
|
|
kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
|
|
kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
|
|
- } else if regexp.MustCompile("(中标候选人|名单及其排序|排序)").MatchString(tabletag) && t1.Value == "采购单位"{
|
|
|
|
- kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight-100})
|
|
|
|
- }else{
|
|
|
|
|
|
+ } else if regexp.MustCompile("(中标候选人|名单及其排序|排序)").MatchString(tabletag) && t1.Value == "采购单位" {
|
|
|
|
+ kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight - 100})
|
|
|
|
+ } else {
|
|
kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
|
|
kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -224,6 +224,9 @@ func (table *Table) KVFilter() {
|
|
v := table.SortKV.Map[k]
|
|
v := table.SortKV.Map[k]
|
|
if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
|
|
if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
|
|
+ if k == "2、建设规模" {
|
|
|
|
+ k = "预算"
|
|
|
|
+ }
|
|
kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
|
|
//qutil.Debug(k, v, k1, w1, v1, tag, b)
|
|
//qutil.Debug(k, v, k1, w1, v1, tag, b)
|
|
if tag != "" && table.Tag == "" {
|
|
if tag != "" && table.Tag == "" {
|
|
@@ -665,7 +668,9 @@ func (ts *TableResult) Analy() {
|
|
bp := &u.BlockPackage{}
|
|
bp := &u.BlockPackage{}
|
|
bp.Index = v1
|
|
bp.Index = v1
|
|
bp.Origin = matchres[0][0]
|
|
bp.Origin = matchres[0][0]
|
|
- bp.TableKV = u.NewJobKv()
|
|
|
|
|
|
+ if bp.TableKV == nil {
|
|
|
|
+ bp.TableKV = u.NewJobKv()
|
|
|
|
+ }
|
|
for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
|
|
for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
|
|
if len(table.StandKV[k]) > 0 {
|
|
if len(table.StandKV[k]) > 0 {
|
|
bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
|
|
bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
|
|
@@ -675,6 +680,9 @@ func (ts *TableResult) Analy() {
|
|
if table.BlockPackage.Map[v1] == nil {
|
|
if table.BlockPackage.Map[v1] == nil {
|
|
table.BPackage = true
|
|
table.BPackage = true
|
|
table.BlockPackage.AddKey(v1, bp)
|
|
table.BlockPackage.AddKey(v1, bp)
|
|
|
|
+ }else {
|
|
|
|
+ table.BlockPackage.RemoveKey(v1)
|
|
|
|
+ table.BlockPackage.AddKey(v1, bp)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -797,13 +805,6 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
//log.Println(table.TableResult.Id, table.Html)
|
|
//log.Println(table.TableResult.Id, table.Html)
|
|
//分割表格
|
|
//分割表格
|
|
table.bSplit(n, ts)
|
|
table.bSplit(n, ts)
|
|
- //对没有表头表格的处理
|
|
|
|
- if table.Tag != "" {
|
|
|
|
- _, _, b := CheckMultiPackage(table.Tag, "")
|
|
|
|
- if b {
|
|
|
|
- table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -100})
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
|
|
table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
|
|
//开始查找kv,核心模块,table.SortKV
|
|
//开始查找kv,核心模块,table.SortKV
|
|
table.FindKV()
|
|
table.FindKV()
|
|
@@ -811,13 +812,40 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
if u.IsBrandGoods {
|
|
if u.IsBrandGoods {
|
|
table.analyBrand()
|
|
table.analyBrand()
|
|
}
|
|
}
|
|
- //判断是否是多包,并处理分包的//遍历td分块
|
|
|
|
- table.CheckMultiPackageByTable()
|
|
|
|
res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
|
|
res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
|
|
if !res {
|
|
if !res {
|
|
//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
|
|
//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
|
|
table.KVFilter()
|
|
table.KVFilter()
|
|
}
|
|
}
|
|
|
|
+ //对没有表头表格的处理
|
|
|
|
+ if table.Tag != "" {
|
|
|
|
+ co, m, b := CheckMultiPackage(table.Tag, "")
|
|
|
|
+ if b {
|
|
|
|
+ table.BPackage = b
|
|
|
|
+ if len(table.BlockPackage.Map) == 0 {
|
|
|
|
+ for _,av := range m{
|
|
|
|
+ kv := u.NewJobKv()
|
|
|
|
+ kv.KvTags= table.StandKV
|
|
|
|
+ bd:=u.PackageNumberConvert(av[0])
|
|
|
|
+ blockPackage := &u.BlockPackage{
|
|
|
|
+ Origin:av[0],
|
|
|
|
+ Name:av[0],
|
|
|
|
+ Text:co,
|
|
|
|
+ TableKV:kv,
|
|
|
|
+ Index:bd,
|
|
|
|
+ }
|
|
|
|
+ if bd !=""{
|
|
|
|
+ table.BlockPackage.AddKey(bd, blockPackage)
|
|
|
|
+ }else {
|
|
|
|
+ table.BlockPackage.AddKey(av[0], blockPackage)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -300})
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ //判断是否是多包,并处理分包的//遍历td分块
|
|
|
|
+ table.CheckMultiPackageByTable()
|
|
//MergeKvTags(table.TableResult.KvTags, table.StandKV)
|
|
//MergeKvTags(table.TableResult.KvTags, table.StandKV)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -1062,27 +1090,6 @@ func (table *Table) FindTag() {
|
|
if table.Tag != "" {
|
|
if table.Tag != "" {
|
|
return
|
|
return
|
|
}
|
|
}
|
|
- t1, _ := goquery.OuterHtml(table.Goquery)
|
|
|
|
- //t1, _ := table.Goquery.OuterHtml()
|
|
|
|
- html := table.Html
|
|
|
|
- pos := strings.Index(html, t1)
|
|
|
|
- if pos <= 0 {
|
|
|
|
- doc, _ := goquery.NewDocumentFromReader(strings.NewReader(table.Html))
|
|
|
|
- html, _ = doc.Html()
|
|
|
|
- pos = strings.Index(html, t1)
|
|
|
|
- }
|
|
|
|
- //u.Debug("--------", t1, "====\n\n\n\n=====", html)
|
|
|
|
- if pos > 0 {
|
|
|
|
- tcon := html[:pos]
|
|
|
|
- tcon = cut.ClearHtml(tcon)
|
|
|
|
- tcon = ClearTagReg.ReplaceAllString(tcon, "")
|
|
|
|
- //u.Debug(pos, "-----------", tcon)
|
|
|
|
- strs := ttagreg.FindStringSubmatch(tcon)
|
|
|
|
- if len(strs) > 0 {
|
|
|
|
- table.Tag = strs[0]
|
|
|
|
- //u.Debug(table.Tag)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
if table.Tag == "" {
|
|
if table.Tag == "" {
|
|
table.Tag = table.TableResult.BlockTag
|
|
table.Tag = table.TableResult.BlockTag
|
|
}
|
|
}
|
|
@@ -1693,7 +1700,7 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
|
|
near.KVDirect = direct
|
|
near.KVDirect = direct
|
|
near.KeyDirect = vdirect
|
|
near.KeyDirect = vdirect
|
|
td.KVDirect = direct
|
|
td.KVDirect = direct
|
|
- key := near.Val
|
|
|
|
|
|
+ key := repSpace.ReplaceAllString(near.Val, "")
|
|
if near.Val == "" {
|
|
if near.Val == "" {
|
|
key = fmtkey("k", near.TR.RowPos, near.ColPos)
|
|
key = fmtkey("k", near.TR.RowPos, near.ColPos)
|
|
}
|
|
}
|
|
@@ -1953,10 +1960,12 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
|
|
//根据数组index分包长度添加table.BlockPackage子包数组
|
|
//根据数组index分包长度添加table.BlockPackage子包数组
|
|
for nk, v := range index {
|
|
for nk, v := range index {
|
|
if tn.BlockPackage.Map[v] == nil {
|
|
if tn.BlockPackage.Map[v] == nil {
|
|
|
|
+ kv := u.NewJobKv()
|
|
|
|
+ kv.KvTags = tn.StandKV
|
|
bp := &u.BlockPackage{}
|
|
bp := &u.BlockPackage{}
|
|
bp.Index = v //序号 (转换后编号,只有数字或字母)
|
|
bp.Index = v //序号 (转换后编号,只有数字或字母)
|
|
bp.Origin = oldIndex[nk] //包的原始值
|
|
bp.Origin = oldIndex[nk] //包的原始值
|
|
- bp.TableKV = u.NewJobKv() //table kv (分出的对应的KV值)
|
|
|
|
|
|
+ bp.TableKV = kv //table kv (分出的对应的KV值)
|
|
tn.BlockPackage.AddKey(v, bp) //table子包数组
|
|
tn.BlockPackage.AddKey(v, bp) //table子包数组
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -1971,8 +1980,8 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
|
|
//查找分包中的中标人排序
|
|
//查找分包中的中标人排序
|
|
if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
|
|
if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
|
|
for _, v := range tn.BlockPackage.Keys {
|
|
for _, v := range tn.BlockPackage.Keys {
|
|
- vv := tn.BlockPackage.Map[v].(*u.BlockPackage)
|
|
|
|
- if vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0 {
|
|
|
|
|
|
+ vv, ok := tn.BlockPackage.Map[v].(*u.BlockPackage)
|
|
|
|
+ if ok && (vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0) {
|
|
vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2)
|
|
vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -2084,7 +2093,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
|
|
func (tn *Table) isGoonNext() {
|
|
func (tn *Table) isGoonNext() {
|
|
blockPackage := map[string]*u.BlockPackage{}
|
|
blockPackage := map[string]*u.BlockPackage{}
|
|
for _, k := range tn.SortKV.Keys {
|
|
for _, k := range tn.SortKV.Keys {
|
|
- if excludeKey.MatchString(k) {
|
|
|
|
|
|
+ if excludeKey.MatchString(k) || strings.Contains(k, "批复") {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
str := "" //拼装为冒号kv
|
|
str := "" //拼装为冒号kv
|
|
@@ -2233,7 +2242,7 @@ func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, keyEx
|
|
func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, index_pos []int, val int, pac int, hasPkgTd map[string]bool) (rkey_index int, rindex []string, rindex_pos []int, rval int, rpac int, rhasPkgTd map[string]bool) {
|
|
func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, index_pos []int, val int, pac int, hasPkgTd map[string]bool) (rkey_index int, rindex []string, rindex_pos []int, rval int, rpac int, rhasPkgTd map[string]bool) {
|
|
for in, k := range tn.SortKV.Keys {
|
|
for in, k := range tn.SortKV.Keys {
|
|
//涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)就跳过
|
|
//涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)就跳过
|
|
- if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) {
|
|
|
|
|
|
+ if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) || strings.Contains(k, "批复") {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
v := tn.SortKV.Map[k]
|
|
v := tn.SortKV.Map[k]
|
|
@@ -3168,7 +3177,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
|
|
for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|
|
for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|
|
val := table.SortKV.Map[key]
|
|
val := table.SortKV.Map[key]
|
|
key = regReplAllSpace.ReplaceAllString(key, "")
|
|
key = regReplAllSpace.ReplaceAllString(key, "")
|
|
- key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
|
|
|
|
+ key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
|
|
if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
|
|
if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
|
|
/*
|
|
/*
|
|
{
|
|
{
|