|
@@ -89,9 +89,6 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
qu.Catch()
|
|
|
for field, tmps := range result {
|
|
|
for tmpsindex, tmpsvalue := range tmps {
|
|
|
- if tmpsvalue.ExtFrom == "title"{
|
|
|
- continue
|
|
|
- }
|
|
|
//是否有段标签
|
|
|
if len(tmpsvalue.BlockTag) > 0 {
|
|
|
//有标签段
|
|
@@ -109,25 +106,22 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
//qz := TagConfig["其他"][field]
|
|
|
//tmps[tmpsindex].Score += 2 * qz //乘以权重系数
|
|
|
}
|
|
|
-
|
|
|
- //是否有kv值
|
|
|
- if strings.Contains(tmpsvalue.Type, "colon") {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["colon"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["colon"])})
|
|
|
- } else if strings.Contains(tmpsvalue.Type, "space") {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["space"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["space"])})
|
|
|
- } else if strings.Contains(tmpsvalue.Type, "table") {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["table"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["table"])})
|
|
|
- }
|
|
|
-
|
|
|
- //正则
|
|
|
- if strings.Contains(tmpsvalue.Type, "regexp") {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["regexp"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])})
|
|
|
+ if tmpsvalue.ExtFrom != "title" { //非标题抽取
|
|
|
+ //是否有kv值
|
|
|
+ if strings.Contains(tmpsvalue.Type, "colon") {
|
|
|
+ tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["colon"])
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["colon"])})
|
|
|
+ } else if strings.Contains(tmpsvalue.Type, "space") {
|
|
|
+ tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["space"])
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["space"])})
|
|
|
+ } else if strings.Contains(tmpsvalue.Type, "table") {
|
|
|
+ tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["table"])
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["table"])})
|
|
|
+ } else if strings.Contains(tmpsvalue.Type, "regexp") {
|
|
|
+ tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["regexp"])
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])})
|
|
|
+ }
|
|
|
}
|
|
|
-
|
|
|
scoreRule := SoreConfig[field]
|
|
|
if scoreRule == nil {
|
|
|
continue
|
|
@@ -159,7 +153,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
|
|
|
} else {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(scores[1])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score:qu.Float64All(scores[1])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
|
|
|
}
|
|
|
}
|
|
|
}
|