|
@@ -12,16 +12,25 @@ import (
|
|
|
"unicode/utf8"
|
|
|
)
|
|
|
|
|
|
-var SoreConfig map[string]map[string]interface{}
|
|
|
-var TagConfig map[string]map[string]float64
|
|
|
-var TagConfigDesc map[string]string
|
|
|
-var TitleScore float64
|
|
|
+var (
|
|
|
+ SoreConfig map[string]map[string]interface{}
|
|
|
+ TagConfig map[string]map[string]float64
|
|
|
+ TagConfigDesc map[string]string
|
|
|
+
|
|
|
+ TitleScore, RepeatScore, BlockScore float64
|
|
|
+)
|
|
|
|
|
|
func init() {
|
|
|
qu.ReadConfig("./res/tagscoredesc.json", &TagConfigDesc)
|
|
|
qu.ReadConfig("./res/tagscore.json", &TagConfig)
|
|
|
qu.ReadConfig("./res/fieldscore.json", &SoreConfig)
|
|
|
TitleScore = qu.Float64All(SoreConfig["extractype"]["title"])
|
|
|
+ if repeat, ok := SoreConfig["other"]["repeat"].(map[string]interface{}); ok {
|
|
|
+ RepeatScore = qu.Float64All(repeat["score"])
|
|
|
+ }
|
|
|
+ if block, ok := SoreConfig["other"]["block"].(map[string]interface{}); ok {
|
|
|
+ BlockScore = qu.Float64All(block["score"])
|
|
|
+ }
|
|
|
//实例化正则
|
|
|
for _, tmp := range SoreConfig {
|
|
|
//log.Println(tmp)
|
|
@@ -89,8 +98,8 @@ func init() {
|
|
|
|
|
|
//结果打分
|
|
|
func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
- result := j.Result
|
|
|
qu.Catch()
|
|
|
+ result := j.Result
|
|
|
for field, tmps := range result {
|
|
|
for tmpsindex, tmpsvalue := range tmps {
|
|
|
if tmpsvalue.ExtFrom == "title" { //标题打分初始化
|
|
@@ -107,8 +116,8 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
qz = TagConfig[key][field]
|
|
|
}
|
|
|
}
|
|
|
- tmps[tmpsindex].Score += 2 * qz //乘以权重系数
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "匹配段标签权重", Code: "权重系数乘以2", RuleText: "BlockTag", ScoreFrom: "tagscore.json", Value: tmpsvalue.Value, Score: 2 * qz})
|
|
|
+ tmps[tmpsindex].Score += BlockScore * qz //乘以权重系数
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "匹配段标签权重", Code: "权重系数乘以2", RuleText: "BlockTag", ScoreFrom: "tagscore.json", Value: tmpsvalue.Value, Score: BlockScore * qz})
|
|
|
} else {
|
|
|
//没有段标签,走其他
|
|
|
//qz := TagConfig["其他"][field]
|
|
@@ -146,21 +155,18 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
if lengths, ok := scoreRule["length"].([]interface{}); ok {
|
|
|
for _, tmp := range lengths {
|
|
|
if length, ok := tmp.(map[string]interface{}); ok {
|
|
|
- min := qu.IntAll(length["min"])
|
|
|
- max := qu.IntAll(length["max"])
|
|
|
- scores, _ := length["score"].([]interface{})
|
|
|
- if len(scores) < 3 {
|
|
|
- continue
|
|
|
- }
|
|
|
- if valueLen < min {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(scores[0])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(valueLen, "<", min), ScoreFrom: "fieldscore.json.length", Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
|
|
|
- } else if valueLen > max {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(scores[2])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(valueLen, ">", max), ScoreFrom: "fieldscore.json.length", Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
|
|
|
- } else {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(scores[1])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(valueLen, ">", min, "&&", valueLen, "<", max), ScoreFrom: "fieldscore.json.length", Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
|
|
|
+ if ranges, ok := length["range"].([]interface{}); ok {
|
|
|
+ gt := qu.IntAll(ranges[0])
|
|
|
+ lte := qu.IntAll(ranges[1])
|
|
|
+ if lte < 0 { //∞
|
|
|
+ lte = 999999
|
|
|
+ }
|
|
|
+ score := qu.Float64All(ranges[2])
|
|
|
+ if valueLen > gt && valueLen <= lte {
|
|
|
+ tmps[tmpsindex].Score += score
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(gt, "<", valueLen, "<=", lte), ScoreFrom: "fieldscore.json.length", Value: tmpsvalue.Value, Score: score})
|
|
|
+ break
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -259,6 +265,19 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ //计算重复值,并加分=重复数量*乘系数
|
|
|
+ valrepeat := map[string]int{}
|
|
|
+ for _, v := range tmps {
|
|
|
+ valrepeat[fmt.Sprint(v.Value)] += 1
|
|
|
+ }
|
|
|
+ for index, v := range tmps {
|
|
|
+ v.ValRepeat = valrepeat[fmt.Sprint(v.Value)] - 1
|
|
|
+ if v.ValRepeat > 0 {
|
|
|
+ score := RepeatScore * float64(v.ValRepeat)
|
|
|
+ v.Score += score
|
|
|
+ tmps[index].ScoreItem = append(tmps[index].ScoreItem, &ju.ScoreItem{Des: "重复次数打分repeat", Code: field + ".repeat", RuleText: "repeat:" + fmt.Sprint(v.ValRepeat), ScoreFrom: "fieldscore.json." + field, Value: v.Value, Score: score})
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
return result
|
|
|
}
|