Jelajahi Sumber

抽取优化

fengweiqiang 6 tahun lalu
induk
melakukan
134ff276ce

+ 4 - 0
src/jy/extract/score.go

@@ -114,6 +114,8 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 		for tmpsindex, tmpsvalue := range tmps {
 			//没有抽取到值,不打分
 			if string_value := fmt.Sprint(tmpsvalue.Value); string_value == "" || string_value == "0" || string_value == "<nil>" {
+				tmps[tmpsindex].Score = -10
+				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: `value结果为空直接-10分`, Code: field, Value: tmpsvalue.Value, Score: -10})
 				continue
 			}
 			lockscore.Lock()
@@ -198,6 +200,8 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 				//1.长度打分
 				valueLen := utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value))
 				if valueLen < 1 {
+					tmps[tmpsindex].Score = -10
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: `valueLen < 1 && field != "projectscope"直接-10分`, Code: field, Value: tmpsvalue.Value, Score: -10})
 					continue
 				}
 				if valueLen > 100 && field != "projectscope" {

+ 4 - 3
src/jy/extract/score_jsondata.go

@@ -74,11 +74,12 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 		lockscore.Lock()
 		scoreRule := SoreConfig[k]
 		lockscore.Unlock()
-		if k == "projectname" || k == "buyer" || k == "projectcode" || k == "agency" {
+		if k == "projectname" || k == "projectcode" || k == "buyer" || k == "winner" || k == "agency" || k == "buyerperson" || k == "buyertel"{
 			for i, tmpsvalue := range v {
 				//1.长度打分
 				valueLen := utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value))
 				if valueLen < 1 {
+					v[i].Score = -5
 					continue
 				}
 				if valueLen > 100 {
@@ -141,7 +142,7 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 	}
 
 	for k, v := range tmps { //新打分的结果集放入到result中,v为数组只有2个值
-		if v[0].Score == v[1].Score {//分数相等优先取打分的值
+		if v[0].Score == v[1].Score { //分数相等优先取打分的值
 			if v[0].ExtFrom == "JsonData_"+k {
 				j.Result[k] = append(j.Result[k], v[1])
 			} else {
@@ -149,7 +150,7 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 			}
 			continue
 		}
-		j.Result[k] = append(j.Result[k], v...)//分数不相等就放入result排序
+		j.Result[k] = append(j.Result[k], v...) //分数不相等就放入result排序
 	}
 
 	//结果排序

+ 3 - 3
src/jy/pretreated/analystep.go

@@ -178,8 +178,8 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
 	tablePackage := map[string]*util.BlockPackage{}
 	if tabres.IsMultiPackage {
 		//分包中的map
-		for k, v := range tabres.PackageMap.Map {
-			blockPackage, ok := v.(*util.BlockPackage)
+		for _, v := range tabres.PackageMap.Keys {
+			blockPackage, ok := tabres.PackageMap.Map[v].(*util.BlockPackage)
 			if !ok {
 				continue
 			}
@@ -201,7 +201,7 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
 				blockPackage.TableKV = util.NewJobKv()
 			}
 			MergeKvTags(blockPackage.TableKV.KvTags, GetKvTags(labelKVs, "", nil))
-			tablePackage[k] = blockPackage
+			tablePackage[v] = blockPackage
 		}
 	}
 	//处理中标人排序

+ 39 - 36
src/jy/pretreated/analytable.go

@@ -296,7 +296,7 @@ func (table *Table) KVFilter() {
 			onePkg, _ := table.BlockPackage.Map[onePkgKey].(*u.BlockPackage)
 			if onePkg != nil && onePkg.WinnerOrder != nil && len(onePkg.WinnerOrder) == 0 {
 				onePkg.WinnerOrder = table.WinnerOrder
-				table.BlockPackage.Map[onePkgKey] = onePkg
+				table.BlockPackage.AddKey(onePkgKey, onePkg)
 			}
 		}
 	}
@@ -511,11 +511,11 @@ func (table *Table) MergerToTableresult() {
 	//对多包表格的多包值的合并处理
 	if table.BPackage {
 		table.TableResult.IsMultiPackage = true
-		for k, v := range table.BlockPackage.Map {
-			package1 := table.TableResult.PackageMap.Map[k]
+		for _, v2 := range table.BlockPackage.Keys {
+			package1 := table.TableResult.PackageMap.Map[v2]
 			if package1 == nil {
-				table.TableResult.PackageMap.AddKey(k, v)
-				if vvv, ok := v.(*u.BlockPackage); ok {
+				table.TableResult.PackageMap.AddKey(v2, table.BlockPackage.Map[v2])
+				if vvv, ok := table.BlockPackage.Map[v2].(*u.BlockPackage); ok {
 					if vvv.TableKV != nil && len(vvv.TableKV.KvTags) > 0 {
 						MergeKvTags(table.TableResult.KvTags, vvv.TableKV.KvTags)
 					}
@@ -525,7 +525,7 @@ func (table *Table) MergerToTableresult() {
 				if bp.TableKV == nil {
 					bp.TableKV = u.NewJobKv()
 				}
-				v1 := v.(*u.BlockPackage)
+				v1 := table.BlockPackage.Map[v2].(*u.BlockPackage)
 				if v1.TableKV != nil && len(v1.TableKV.KvTags) > 0 {
 					for k2, v2 := range v1.TableKV.KvTags {
 						if k2 == "" {
@@ -533,7 +533,7 @@ func (table *Table) MergerToTableresult() {
 						}
 						isExists := false
 						for _, v2v := range v2 {
-							if v2v.Value == ""{
+							if v2v.Value == "" {
 								continue
 							}
 							for _, v2vv := range bp.TableKV.KvTags[k2] {
@@ -549,7 +549,7 @@ func (table *Table) MergerToTableresult() {
 						}
 					}
 				}
-				if len(v1.WinnerOrder)>0 && len(bp.WinnerOrder) == 0{
+				if len(v1.WinnerOrder) > 0 && len(bp.WinnerOrder) == 0 {
 					bp.WinnerOrder = v1.WinnerOrder
 				}
 				//table.TableResult.PackageMap.AddKey(k, v)
@@ -585,20 +585,23 @@ func (table *Table) MergerToTableresult() {
 			}
 		}
 	}
-	if table.BlockPackage.Map != nil{
-		for _,v := range table.BlockPackage.Map{
-			if vvv, ok := v.(*u.BlockPackage); ok {
-				if len(vvv.TableKV.KvTags) > 0{
-					for kk,vv := range vvv.TableKV.KvTags{
-						if kk == ""{
-							continue
-						}
-						if len(table.TableResult.KvTags[kk]) == 0 {
-							table.TableResult.KvTags[kk] = vv
+	if table.BlockPackage != nil && len(table.BlockPackage.Keys) > 0 {
+		for _, v := range table.BlockPackage.Keys {
+			if table.BlockPackage.Map[v] != nil {
+				if vvv, ok := table.BlockPackage.Map[v].((*u.BlockPackage)); ok {
+					if vvv.TableKV != nil && len(vvv.TableKV.KvTags) > 0 {
+						for kk, vv := range vvv.TableKV.KvTags {
+							if kk == "" {
+								continue
+							}
+							if len(table.TableResult.KvTags[kk]) == 0 {
+								table.TableResult.KvTags[kk] = vv
+							}
 						}
 					}
 				}
 			}
+
 		}
 	}
 }
@@ -1353,9 +1356,9 @@ func (table *Table) ComputeRowColIsKeyRation() {
 					sv.AddKey(k, v)
 				}
 				if len(sv.Keys) > 0 {
-					for k1, v1 := range sv.Map {
-						if tr.TDs[0].SortKV.Map[k1] == nil {
-							table.SortKV.AddKey(k1, v1)
+					for _, v1 := range sv.Keys {
+						if tr.TDs[0].SortKV.Map[v1] == nil {
+							table.SortKV.AddKey(v1, sv.Map[v1])
 						}
 					}
 				} else if table.Tag == "" && k == 0 && len(tr.TDs[0].Val) > 11 {
@@ -1435,11 +1438,11 @@ func (table *Table) FindKV() {
 								bodirect = bo
 							}
 							if len(td.SortKV.Map) > 0 {
-								for tdk, tdv := range td.SortKV.Map {
-									if tdv == nil || tdv == "" { //value为空或者null不再添加到table.SortKV
+								for _, tdv := range td.SortKV.Keys {
+									if  tdv == "" || td.SortKV.Map[tdv] == ""{ //value为空或者null不再添加到table.SortKV
 										continue
 									}
-									table.SortKV.AddKey(tdk, tdv)
+									table.SortKV.AddKey(tdv, td.SortKV.Map[tdv])
 								}
 							}
 						}
@@ -1598,7 +1601,7 @@ func GetBidOrder(td *TD, direct, n int) (d int, res bool) {
 						"sort":    GetBidSort(td.Val, n),
 					})
 					res = true
-					td.TR.Table.SortKV.Map[NullTxtBid] = a1
+					td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
 				}
 			}
 		}
@@ -1625,7 +1628,7 @@ func GetBidOrder(td *TD, direct, n int) (d int, res bool) {
 						"sort":    GetBidSort(td.Val, n),
 					})
 					res = true
-					td.TR.Table.SortKV.Map[NullTxtBid] = a1
+					td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
 				}
 			}
 		}
@@ -1647,7 +1650,7 @@ func GetBidOrder(td *TD, direct, n int) (d int, res bool) {
 				"sort":    GetBidSort(td.Val, n),
 			})
 			res = true
-			td.TR.Table.SortKV.Map[NullTxtBid] = a1
+			td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
 			//} else if ((btd != nil && !btd.BH && btd.Valtype == "BO") || direct == 2) && rtd != nil && filter_zbdw_v.MatchString(rtd.Val) {
 		} else if ((btd != nil && !btd.BH) || direct == 2) && rtd != nil && filter_zbdw_v2.MatchString(rtd.Val) {
 			d = 2
@@ -1663,7 +1666,7 @@ func GetBidOrder(td *TD, direct, n int) (d int, res bool) {
 				"sort":    GetBidSort(td.Val, n),
 			})
 			res = true
-			td.TR.Table.SortKV.Map[NullTxtBid] = a1
+			td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
 		}
 	}
 	return
@@ -1970,9 +1973,9 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 		tn.isGoonNext()
 	}
 	//查找分包中的中标人排序
-	if tn.BlockPackage != nil && tn.BlockPackage.Map != nil && len(tn.BlockPackage.Map) > 0 {
-		for _, v := range tn.BlockPackage.Map {
-			vv := v.(*u.BlockPackage)
+	if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
+		for _, v := range tn.BlockPackage.Keys {
+			vv := tn.BlockPackage.Map[v].(*u.BlockPackage)
 			if vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0 {
 				vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2)
 			}
@@ -1986,8 +1989,8 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
 	if len(index) == 1 { //是一个的情况
 		if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { //table带排序的KV值小于10并且小于10列和小于4行
 			beq := true
-			for _, v2 := range tn.SortKV.Map {
-				if _, ok := v2.(string); !ok {
+			for _, v2 := range tn.SortKV.Keys {
+				if _, ok := tn.SortKV.Map[v2].(string); !ok {
 					beq = false
 					break
 				}
@@ -1995,8 +1998,8 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
 			if beq { //统一处理为数组
 				td := tn.GetTdByRCNo(tn.RowNum-1, 0)
 				if !td.BH && FindVal2_1.MatchString(td.Val) {
-					for k2, v2 := range tn.SortKV.Map {
-						tn.SortKV.Map[k2] = []string{v2.(string)}
+					for _, v2 := range tn.SortKV.Keys {
+						tn.SortKV.AddKey(v2,[]string{tn.SortKV.Map[v2].(string)})
 					}
 				} else {
 					//没有处理成数组的情况下,继续调用正文查找分包的方法
@@ -2319,7 +2322,7 @@ func (tn *Table) assemblePackage(k1, v1, key string) {
 	if !excludeKey.MatchString(k1) {
 		bp.Text += fmt.Sprintf("%v:%v\n", k1, v1)
 	}
-	tn.BlockPackage.Map[key] = bp
+	tn.BlockPackage.AddKey(key, bp)
 }
 
 /**

+ 8 - 8
src/jy/pretreated/tablev2.go

@@ -265,12 +265,12 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR) {
 			if sonts.IsMultiPackage {
 				td.TR.Table.BPackage = true
 				tb1 := td.TR.Table.BlockPackage
-				for k, v := range sonts.PackageMap.Map {
-					v1 := v.(*u.BlockPackage)
-					if tb1.Map[k] == nil {
-						tb1.AddKey(k, v)
+				for _, v := range sonts.PackageMap.Keys {
+					v1 := sonts.PackageMap.Map[v].(*u.BlockPackage)
+					if tb1.Map[v] == nil {
+						tb1.AddKey(v, sonts.PackageMap.Map[v])
 					} else {
-						bp := tb1.Map[k].(*u.BlockPackage)
+						bp := tb1.Map[v].(*u.BlockPackage)
 						if bp != nil && v1.TableKV != nil {
 							for k2, v2 := range v1.TableKV.KvTags {
 								if bp.TableKV == nil {
@@ -361,7 +361,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 				}
 				MergeKvTags(bp.TableKV.KvTags, bp_v.ColonKV.KvTags)
 				MergeKvTags(bp.TableKV.KvTags, bp_v.SpaceKV.KvTags)
-				table.TableResult.PackageMap.Map[bp_k] = bp
+				table.TableResult.PackageMap.AddKey(bp_k, bp)
 			}
 		}
 	}
@@ -430,8 +430,8 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 		*/
 
 		fSortKV := FindKv(td.Val, "", 2)
-		for k, v := range fSortKV.Map {
-			td.SortKV.AddKey(k, v)
+		for _, v := range fSortKV.Keys {
+			td.SortKV.AddKey(v, fSortKV.Map[v])
 		}
 		//		td.LeftNode.Val
 		//		for _, vvv := range *td.TR {