zhangjinkun 6 years ago
parent
commit
8f3dff1533
3 changed files with 42 additions and 46 deletions
  1. 18 18
      src/jy/extract/extract.go
  2. 14 28
      src/jy/extract/score.go
  3. 10 0
      src/jy/util/util.go

+ 18 - 18
src/jy/extract/extract.go

@@ -443,9 +443,6 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 		}
 		//函数清理
 		for key, val := range j.Result {
-			tmpExtFields := make([]*ju.ExtField, 0)
-			tmpWeight := -999 //记录最大权重
-			tmpIndex := -999  //记录最大权重下标
 			for _, v := range val {
 				lockclear.Lock()
 				cfn := e.ClearFn[key]
@@ -470,21 +467,24 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 				lockclear.Unlock()
 			}
 			//项目编号,采购单位权重清理
-			if (key == "projectcode" || key == "buyer") && len(val) > 1 {
-				for i, v := range val {
-					if v.Weight == 0 {
-						tmpExtFields = append(tmpExtFields, v)
-						continue
-					} else if v.Weight > tmpWeight {
-						tmpWeight = v.Weight
-						tmpIndex = i
-					}
-				}
-				if tmpIndex != -999 {
-					tmpExtFields = append(tmpExtFields, val[tmpIndex])
-					j.Result[key] = tmpExtFields
-				}
-			}
+			//          tmpExtFields := make([]*ju.ExtField, 0)
+			//			tmpWeight := -999 //记录最大权重
+			//			tmpIndex := -999  //记录最大权重下标
+			//			if (key == "projectcode" || key == "buyer") && len(val) > 1 {
+			//				for i, v := range val {
+			//					if v.Weight == 0 {
+			//						tmpExtFields = append(tmpExtFields, v)
+			//						continue
+			//					} else if v.Weight > tmpWeight {
+			//						tmpWeight = v.Weight
+			//						tmpIndex = i
+			//					}
+			//				}
+			//				if tmpIndex != -999 {
+			//					tmpExtFields = append(tmpExtFields, val[tmpIndex])
+			//					j.Result[key] = tmpExtFields
+			//				}
+			//			}
 		}
 		PackageDetail(j, e) //处理分包信息
 		//		bs, _ := json.Marshal(j.Result)

+ 14 - 28
src/jy/extract/score.go

@@ -136,7 +136,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 						qz = TagConfig[key][field]
 					}
 				}
-				tmps[tmpsindex].Score += BlockScore * qz //乘以权重系数
+				tmps[tmpsindex].Score += ju.FloatFormat(BlockScore*qz, 4) //乘以权重系数
 				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "匹配段标签权重", Code: "权重系数乘以2", RuleText: "BlockTag", ScoreFrom: "tagscore.json", Value: tmpsvalue.Value, Score: BlockScore * qz})
 			} else {
 				//没有段标签,走其他
@@ -151,39 +151,24 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 					tmps[tmpsindex].Score += fieldscore["title"]
 					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "title初始化", Code: "fieldscore.title", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["title"]})
 				}
-				if strings.Contains(tmpsvalue.Type, "colon") {
-					tmps[tmpsindex].Score += fieldscore["colon"]
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: "fieldscore.colon", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["colon"]})
-				} else if strings.Contains(tmpsvalue.Type, "space") {
-					tmps[tmpsindex].Score += fieldscore["space"]
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: "fieldscore.space", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["space"]})
-				} else if strings.Contains(tmpsvalue.Type, "table") {
-					tmps[tmpsindex].Score += fieldscore["table"]
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: "fieldscore.table", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["table"]})
-				} else if strings.Contains(tmpsvalue.Type, "regexp") {
-					tmps[tmpsindex].Score += fieldscore["regexp"]
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: "fieldscore.regexp", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["regexp"]})
-				}
+				tmps[tmpsindex].Score += fieldscore[tmpsvalue.Type]
+				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: tmpsvalue.Type, Code: "fieldscore." + tmpsvalue.Type, RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore[tmpsvalue.Type]})
 			} else { //通用抽取属性打分配置
 				if tmpsvalue.ExtFrom == "title" { //标题打分初始化
 					tmps[tmpsindex].Score += CommonScore["title"]
 					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "title初始化", Code: "fieldscore.title", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["title"]})
 				}
-				if strings.Contains(tmpsvalue.Type, "colon") {
-					tmps[tmpsindex].Score += CommonScore["colon"]
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: "fieldscore.colon", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["colon"]})
-				} else if strings.Contains(tmpsvalue.Type, "space") {
-					tmps[tmpsindex].Score += CommonScore["space"]
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: "fieldscore.space", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["space"]})
-				} else if strings.Contains(tmpsvalue.Type, "table") {
-					tmps[tmpsindex].Score += CommonScore["table"]
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: "fieldscore.table", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["table"]})
-				} else if strings.Contains(tmpsvalue.Type, "regexp") {
-					tmps[tmpsindex].Score += CommonScore["regexp"]
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: "fieldscore.regexp", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["regexp"]})
-				}
+				tmps[tmpsindex].Score += CommonScore[tmpsvalue.Type]
+				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: tmpsvalue.Type, Code: "fieldscore." + tmpsvalue.Type, RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore[tmpsvalue.Type]})
+			}
+			//kv权重打分
+			if tmpsvalue.Type == "colon" || tmpsvalue.Type == "space" || tmpsvalue.Type == "table" {
+				weightscore := ju.FloatFormat(1+float64(tmps[tmpsindex].Weight)/1000, 4)
+				tmps[tmpsindex].Score += weightscore
+				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "kv权重打分", Code: "kv-weight", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: weightscore})
+			} else {
+				//正则权重,暂不考虑
 			}
-
 			scoreRule := SoreConfig[field]
 			if scoreRule == nil {
 				continue
@@ -324,6 +309,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 				v.Score += score
 				tmps[index].ScoreItem = append(tmps[index].ScoreItem, &ju.ScoreItem{Des: "重复次数打分repeat", Code: field + ".repeat", RuleText: "repeat:" + fmt.Sprint(v.ValRepeat), ScoreFrom: "fieldscore.json." + field, Value: v.Value, Score: score})
 			}
+			v.Score = ju.FloatFormat(v.Score, 4)
 		}
 	}
 	return result

+ 10 - 0
src/jy/util/util.go

@@ -4,6 +4,7 @@ import (
 	"fmt"
 	. "jy/mongodbutil"
 	qu "qfw/util"
+	"strconv"
 
 	. "gopkg.in/mgo.v2/bson"
 )
@@ -137,3 +138,12 @@ func InitBrand() {
 	BrandGet = &DFA{}
 	BrandGet.AddWord(BrandConfig...)
 }
+
+func FloatFormat(val float64, length int) float64 {
+	tmp, err := strconv.ParseFloat(strconv.FormatFloat(val, 'f', length, 64), 64)
+	if err != nil {
+		return 0
+	} else {
+		return tmp
+	}
+}