Pārlūkot izejas kodu

table注释优化

fengweiqiang 6 gadi atpakaļ
vecāks
revīzija
15e8c4739f
2 mainītis faili ar 120 papildinājumiem un 94 dzēšanām
  1. 116 92
      src/jy/pretreated/analytable.go
  2. 4 2
      src/jy/pretreated/tablev2.go

+ 116 - 92
src/jy/pretreated/analytable.go

@@ -207,21 +207,20 @@ func (table *Table) KVFilter() {
 	if !winnertag {
 		winnertag = iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
 	}
-	table.analyTdKV() //1.td元素有内嵌kv,遍历放入table的Kv中2.td有子表格的处理,中标候选人排序
+	table.analyTdKV() //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理
 	as := NewSortMap()
-	//表格描述处理,对成交结果的处理
+
+	//遍历table.sortkv,进行过滤处理,并放入标准化KV中,如果值是数组跳到下一步处理
 	for _, k := range table.SortKV.Keys {
+		//表格描述处理,对成交结果的处理
 		if regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序)").MatchString(k) {
 			table.Desc += "成交结果,"
 		}
-	}
-	//遍历table.sortkv,进行过滤处理,并放入标准化KV中,如果值是数组跳到下一步处理
-	for _, k := range table.SortKV.Keys {
 		if regexp.MustCompile("^单价").MatchString(k) {
 			continue
 		}
 		v := table.SortKV.Map[k]
-		if _, ok := v.(string); ok {
+		if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
 			k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
 			k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
 			//qutil.Debug(k, v, k1, w1, v1, tag, b)
@@ -235,7 +234,7 @@ func (table *Table) KVFilter() {
 				if tag != "" && table.Tag == "" {
 					table.Tag = tag
 				}
-				for pos, k2 := range k1 {
+				for pos, k2 := range k1 { //根据关键词,过滤table.SortKV到table.StandKV和table.StandKVWeight
 					if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] {
 						table.StandKV[k2] = v1 //本节点
 						table.StandKVWeight[k2] = w1[pos]
@@ -254,9 +253,84 @@ func (table *Table) KVFilter() {
 		}
 	}
 
-	//处理值是数组的kv放入标准化kv中
+	//处理值是数组的kv放入标准化kv中//处理table.SortKV.value为数组的情况
+	table.sortKVArr(as, winnertag)
+	//
+	if filterTableWror.MatchString(table.Tag) {
+		table.WinnerOrder = nil
+	}
+	//
+	if len(table.WinnerOrder) > 0 || !table.BPackage {
+		winnerOrder := []map[string]interface{}{}
+		maxSort := 0
+		//调整顺序
+		for i := 0; i < 2; i++ {
+			for _, v := range table.WinnerOrder {
+				sortstr, _ := v["sortstr"].(string)
+				if (i == 0 && sortstr == "") || (i == 1 && sortstr != "") {
+					continue
+				}
+				sort, _ := v["sort"].(int)
+				if i == 0 {
+					if maxSort == 0 || sort > maxSort {
+						maxSort = sort
+					}
+				} else {
+					maxSort++
+					v["sort"] = maxSort
+				}
+				winnerOrder = append(winnerOrder, v)
+			}
+			if len(winnerOrder) == len(table.WinnerOrder) {
+				break
+			}
+		}
+		table.WinnerOrder = winnerOrder
+		winnerOrder = []map[string]interface{}{}
+	L: //遍历每个td,查询中标人
+		for _, tr := range table.TRs {
+			for _, td := range tr.TDs {
+				winnerOrder = winnerOrderEntity.Find(td.Val, true, 3)
+				if len(winnerOrder) > 0 {
+					break L
+				}
+			}
+		}
+		if len(table.WinnerOrder) > 0 {
+			//中标候选人合并
+			winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
+			if table.StandKV["中标单位"] == "" {
+				ent := table.WinnerOrder[0]["entname"]
+				if ent != nil {
+					table.StandKV["中标单位"], _ = ent.(string)
+					table.StandKVWeight["中标单位"] = -25
+				}
+			}
+		} else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
+			if len(winnerOrder) > 1 {
+				table.WinnerOrder = winnerOrder
+			}
+		}
+	}
+	//对中标候选人进行排序
+	winnerOrderEntity.Order(table.WinnerOrder)
+	//该表格有一个分包,并且有中标候选人排序的情况下,把中标候选人放到包里面
+	if table.BlockPackage != nil && table.BlockPackage.Keys != nil && len(table.BlockPackage.Keys) == 1 {
+		if table.BlockPackage.Map != nil {
+			onePkgKey := table.BlockPackage.Keys[0]
+			onePkg, _ := table.BlockPackage.Map[onePkgKey].(*u.BlockPackage)
+			if onePkg != nil && onePkg.WinnerOrder != nil && len(onePkg.WinnerOrder) == 0 {
+				onePkg.WinnerOrder = table.WinnerOrder
+				table.BlockPackage.Map[onePkgKey] = onePkg
+			}
+		}
+	}
+}
+
+//处理table.SortKV.value为数组的情况
+func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
 	checkKey := map[int]bool{}
-	for kn, k := range as.Keys {
+	for kn, k := range as.Keys { //遍历table.SortKV.value为数组的key
 		v := as.Map[k]
 		if vm, ok := v.([]map[string]interface{}); ok && k == NullTxtBid {
 			if table.WinnerOrder == nil {
@@ -408,79 +482,9 @@ func (table *Table) KVFilter() {
 			}
 		}
 	}
-	//
-	if filterTableWror.MatchString(table.Tag) {
-		table.WinnerOrder = nil
-	}
-	//
-	if len(table.WinnerOrder) > 0 || !table.BPackage {
-		winnerOrder := []map[string]interface{}{}
-		maxSort := 0
-		//调整顺序
-		for i := 0; i < 2; i++ {
-			for _, v := range table.WinnerOrder {
-				sortstr, _ := v["sortstr"].(string)
-				if (i == 0 && sortstr == "") || (i == 1 && sortstr != "") {
-					continue
-				}
-				sort, _ := v["sort"].(int)
-				if i == 0 {
-					if maxSort == 0 || sort > maxSort {
-						maxSort = sort
-					}
-				} else {
-					maxSort++
-					v["sort"] = maxSort
-				}
-				winnerOrder = append(winnerOrder, v)
-			}
-			if len(winnerOrder) == len(table.WinnerOrder) {
-				break
-			}
-		}
-		table.WinnerOrder = winnerOrder
-		winnerOrder = []map[string]interface{}{}
-	L:
-		for _, tr := range table.TRs {
-			for _, td := range tr.TDs {
-				winnerOrder = winnerOrderEntity.Find(td.Val, true, 3)
-				if len(winnerOrder) > 0 {
-					break L
-				}
-			}
-		}
-		if len(table.WinnerOrder) > 0 {
-			//中标候选人合并
-			winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
-			if table.StandKV["中标单位"] == "" {
-				ent := table.WinnerOrder[0]["entname"]
-				if ent != nil {
-					table.StandKV["中标单位"], _ = ent.(string)
-					table.StandKVWeight["中标单位"] = -25
-				}
-			}
-		} else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
-			if len(winnerOrder) > 1 {
-				table.WinnerOrder = winnerOrder
-			}
-		}
-	}
-	//对中标候选人进行排序
-	winnerOrderEntity.Order(table.WinnerOrder)
-	//该表格有一个分包,并且有中标候选人排序的情况下,把中标候选人放到包里面
-	if table.BlockPackage != nil && table.BlockPackage.Keys != nil && len(table.BlockPackage.Keys) == 1 {
-		if table.BlockPackage.Map != nil {
-			onePkgKey := table.BlockPackage.Keys[0]
-			onePkg, _ := table.BlockPackage.Map[onePkgKey].(*u.BlockPackage)
-			if onePkg != nil && onePkg.WinnerOrder != nil && len(onePkg.WinnerOrder) == 0 {
-				onePkg.WinnerOrder = table.WinnerOrder
-				table.BlockPackage.Map[onePkgKey] = onePkg
-			}
-		}
-	}
 }
 
-//1.td元素有内嵌kv,遍历放入table的Kv中2.td有子表格的处理,中标候选人排序
+//1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理
 func (table *Table) analyTdKV() {
 	//遍历每一行
 	for _, tr := range table.TRs {
@@ -502,7 +506,7 @@ func (table *Table) analyTdKV() {
 						if td.HeadTd != nil && len([]rune(k3)) < 4 {
 							k3 = td.HeadTd.Val + k3
 						}
-						if table.SortKV.Map[k3] == nil {
+						if table.SortKV.Map[k3] == nil && _val != nil && _val != "" {
 							//u.Debug(k3, _val)
 							//if !thisFlag || (thisFlag && table.SortKV.Map[k3] == nil) {
 							table.SortKV.AddKey(k3, _val)
@@ -1436,6 +1440,9 @@ func (table *Table) FindKV() {
 							}
 							if len(td.SortKV.Map) > 0 {
 								for tdk, tdv := range td.SortKV.Map {
+									if tdv == nil || tdv == "" {//value为空或者null不再添加到table.SortKV
+										continue
+									}
 									table.SortKV.AddKey(tdk, tdv)
 								}
 							}
@@ -1759,11 +1766,22 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 				if bvalfind {
 					vals[varrpos] = td.Val // += "__" + td.Val
 				} else {
-					vals = append(vals, td.Val)
-					val = vals
+					//添加时候去除空值和nil
+					newVals := []string{}
+					for _, isval := range vals {
+						if isval == "" {
+							continue
+						}
+						newVals = append(newVals, isval)
+					}
+					//vals = append(vals, td.Val)
+					if td.Val != "" {
+						newVals = append(newVals, td.Val)
+					}
+					val = newVals
 					varrpos = len(vals) - 1
 				}
-			} else if vals, ok := val.(string); ok {
+			} else if vals, ok := val.(string); ok && vals != "" && td.Val != "" {
 				if bvalfind {
 					val = td.Val //vals + "__" + td.Val
 				} else {
@@ -1786,6 +1804,9 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 			tkey := fmtkey("k", near.TR.RowPos, near.ColPos)
 			table.SortKV.ReplaceKey(key, val, tkey)
 		} else {
+			if val == nil || val == "" ||key=="采购项目预算金额"{
+				return
+			}
 			table.SortKV.AddKey(key, val)
 			//if table.SortKV.Map[key] != nil {
 			pos := table.SortKV.Index[key]
@@ -1940,7 +1961,7 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 					tn.BlockPackage.AddKey(v, bp) //table子包数组
 				}
 			}
-			isGoonNext = tn.manyPackageProcessByIndex(index,standIndex_pos)
+			isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos) //多包处理,处理不同情况下的分包
 		}
 	} else {
 		isGoonNext = true
@@ -1959,10 +1980,11 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 	}
 	return
 }
-//多包处理
-func (tn *Table)manyPackageProcessByIndex(index []string ,standIndex_pos []int,)(isGoonNext bool){
+
+//多包处理,处理不同情况下的分包
+func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int, ) (isGoonNext bool) {
 	if len(index) == 1 { //是一个的情况
-		if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 {//table带排序的KV值小于10并且小于10列和小于4行
+		if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { //table带排序的KV值小于10并且小于10列和小于4行
 			beq := true
 			for _, v2 := range tn.SortKV.Map {
 				if _, ok := v2.(string); !ok {
@@ -1999,7 +2021,7 @@ func (tn *Table)manyPackageProcessByIndex(index []string ,standIndex_pos []int,)
 			v1 = v1_array
 		}
 		if val, bvs := v1.([]string); bvs {
-			if len(val) <= len(index) {//table.SortKV.Map.value数组小于等于分包index
+			if len(val) <= len(index) { //table.SortKV.Map.value数组小于等于分包index
 				for k, v := range val {
 					tn.assemblePackage(k1, v, index[k]) //组装解析到的分包
 				}
@@ -2257,15 +2279,17 @@ func (tn *Table) assemblePackage(k1, v1, key string) {
 		bp.TableKV = u.NewJobKv()
 	}
 	if v1 != "" {
-		k2, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1)//匹配抽取关键词
+		k2, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
 		if bf {
 			for pos, k3 := range k2 {
 				if bp.TableKV.Kv != nil && bp.TableKV.KvTag[k3] != nil && (bp.TableKV.Kv[k3] == "" || w1[pos] > bp.TableKV.KvTag[k3].Weight) {
 					bp.TableKV.Kv[k3] = v2
 					bp.TableKV.KvTag[k3] = &u.Tag{Value: v2, Weight: w1[pos]}
-				}else {
+				} else {
 					bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
-					tn.SortKV.AddKey(k3,v2)
+					if tn.SortKV.Map[k3] == nil {
+						tn.SortKV.AddKey(k3, v2) //添加匹配到抽取关键词的key,value
+					}
 				}
 			}
 		} else {

+ 4 - 2
src/jy/pretreated/tablev2.go

@@ -331,7 +331,9 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 		}
 		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 3) //td冒号kv
 		for k, v := range resm {
-			td.SortKV.AddKey(k, v) //存放kv值
+			if k != "" && v != "" {
+				td.SortKV.AddKey(k, v) //存放kv值
+			}
 		}
 		//u.Debug(td.SortKV.Keys, "-------2--------------------------------")
 		//		td.SortKV = FindKv(text, "") //GetKvFromtxt(text, "")
@@ -345,7 +347,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 		} else if !bsontable {
 			txt := repSpace.ReplaceAllString(td.Val, "")
 			btw, must, _, _, repl := CheckHeader(txt)
-			if lenval > 15 {
+			if lenval > 15 && !strings.Contains(txt, "采购代理机构名称、地址和联系方式") {
 				btw = false
 			}
 			if strings.Contains(td.Val, "个项目") {