zhangjinkun 6 سال پیش
والد
کامیت
170c513197
1فایلهای تغییر یافته به همراه16 افزوده شده و 22 حذف شده
  1. 16 22
      src/jy/extract/score.go

+ 16 - 22
src/jy/extract/score.go

@@ -89,9 +89,6 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 	qu.Catch()
 	for field, tmps := range result {
 		for tmpsindex, tmpsvalue := range tmps {
-			if tmpsvalue.ExtFrom == "title"{
-				continue
-			}
 			//是否有段标签
 			if len(tmpsvalue.BlockTag) > 0 {
 				//有标签段
@@ -109,25 +106,22 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 				//qz := TagConfig["其他"][field]
 				//tmps[tmpsindex].Score += 2 * qz //乘以权重系数
 			}
-
-			//是否有kv值
-			if strings.Contains(tmpsvalue.Type, "colon") {
-				tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["colon"])
-				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["colon"])})
-			} else if strings.Contains(tmpsvalue.Type, "space") {
-				tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["space"])
-				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["space"])})
-			} else if strings.Contains(tmpsvalue.Type, "table") {
-				tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["table"])
-				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["table"])})
-			}
-
-			//正则
-			if strings.Contains(tmpsvalue.Type, "regexp") {
-				tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["regexp"])
-				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])})
+			if tmpsvalue.ExtFrom != "title" { //非标题抽取
+				//是否有kv值
+				if strings.Contains(tmpsvalue.Type, "colon") {
+					tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["colon"])
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["colon"])})
+				} else if strings.Contains(tmpsvalue.Type, "space") {
+					tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["space"])
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["space"])})
+				} else if strings.Contains(tmpsvalue.Type, "table") {
+					tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["table"])
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["table"])})
+				} else if strings.Contains(tmpsvalue.Type, "regexp") {
+					tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["regexp"])
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])})
+				}
 			}
-
 			scoreRule := SoreConfig[field]
 			if scoreRule == nil {
 				continue
@@ -159,7 +153,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 								tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
 							} else {
 								tmps[tmpsindex].Score += qu.Float64All(scores[1])
-								tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score:qu.Float64All(scores[1])})
+								tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
 							}
 						}
 					}