|
@@ -9,10 +9,12 @@ import (
|
|
|
"regexp"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
+ "sync"
|
|
|
"unicode/utf8"
|
|
|
)
|
|
|
|
|
|
var (
|
|
|
+ lockscore sync.RWMutex
|
|
|
SoreConfig map[string]map[string]interface{}
|
|
|
TagConfig map[string]map[string]float64
|
|
|
TagConfigDesc map[string]string
|
|
@@ -120,21 +122,28 @@ func init() {
|
|
|
}
|
|
|
|
|
|
//结果打分
|
|
|
-func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
+func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
|
|
|
qu.Catch()
|
|
|
result := j.Result
|
|
|
for field, tmps := range result {
|
|
|
+ locktag.Lock()
|
|
|
+ taglength := len(ftag[field])
|
|
|
+ locktag.Unlock()
|
|
|
for tmpsindex, tmpsvalue := range tmps {
|
|
|
+ lockscore.Lock()
|
|
|
describe := qu.ObjToString(SoreConfig["extractype"]["describe"])
|
|
|
+ lockscore.Unlock()
|
|
|
//是否有段标签
|
|
|
if len(tmpsvalue.BlockTag) > 0 {
|
|
|
//有标签段
|
|
|
var qz float64 = 0.0 //取权重最高的
|
|
|
for key := range tmpsvalue.BlockTag {
|
|
|
//key = "其他"//TODO 测试用
|
|
|
+ lockscore.Lock()
|
|
|
if TagConfig[key][field] > qz {
|
|
|
qz = TagConfig[key][field]
|
|
|
}
|
|
|
+ lockscore.Unlock()
|
|
|
}
|
|
|
tmps[tmpsindex].Score += ju.FloatFormat(BlockScore*qz, 4) //乘以权重系数
|
|
|
tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "匹配段标签权重", Code: "权重系数乘以2", RuleText: "BlockTag", ScoreFrom: "tagscore.json", Value: tmpsvalue.Value, Score: BlockScore * qz})
|
|
@@ -145,31 +154,49 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
}
|
|
|
|
|
|
//抽取类型打分
|
|
|
- if FieldsScore[field] != nil { //指定抽取属性打分配置
|
|
|
- fieldscore := FieldsScore[field]
|
|
|
+ lockscore.Lock()
|
|
|
+ fieldscore := FieldsScore[field]
|
|
|
+ typescore := float64(0)
|
|
|
+ titlescore := float64(0)
|
|
|
+ if fieldscore != nil { //指定抽取属性打分配置
|
|
|
if tmpsvalue.ExtFrom == "title" { //标题打分初始化
|
|
|
- tmps[tmpsindex].Score += fieldscore["title"]
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "title初始化", Code: "fieldscore.title", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["title"]})
|
|
|
+ titlescore = fieldscore["title"]
|
|
|
}
|
|
|
- tmps[tmpsindex].Score += fieldscore[tmpsvalue.Type]
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: tmpsvalue.Type, Code: "fieldscore." + tmpsvalue.Type, RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore[tmpsvalue.Type]})
|
|
|
+ typescore = fieldscore[tmpsvalue.Type]
|
|
|
} else { //通用抽取属性打分配置
|
|
|
if tmpsvalue.ExtFrom == "title" { //标题打分初始化
|
|
|
- tmps[tmpsindex].Score += CommonScore["title"]
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "title初始化", Code: "fieldscore.title", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["title"]})
|
|
|
+ titlescore = CommonScore["title"]
|
|
|
}
|
|
|
- tmps[tmpsindex].Score += CommonScore[tmpsvalue.Type]
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: tmpsvalue.Type, Code: "fieldscore." + tmpsvalue.Type, RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore[tmpsvalue.Type]})
|
|
|
+ typescore = CommonScore[tmpsvalue.Type]
|
|
|
}
|
|
|
+ lockscore.Unlock()
|
|
|
+
|
|
|
+ tmps[tmpsindex].Score += titlescore
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "title初始化", Code: "fieldscore.title", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: titlescore})
|
|
|
+ tmps[tmpsindex].Score += typescore
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: tmpsvalue.Type, Code: "fieldscore." + tmpsvalue.Type, RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: typescore})
|
|
|
+
|
|
|
//kv权重打分
|
|
|
- if tmpsvalue.Type == "colon" || tmpsvalue.Type == "space" || tmpsvalue.Type == "table" {
|
|
|
- weightscore := ju.FloatFormat(1+float64(tmps[tmpsindex].Weight)/1000, 4)
|
|
|
- tmps[tmpsindex].Score += weightscore
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "kv权重打分", Code: "kv-weight", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: weightscore})
|
|
|
+ if fieldscore != nil { //指定抽取属性打分配置
|
|
|
+ if tmpsvalue.Type == "colon" || tmpsvalue.Type == "space" || tmpsvalue.Type == "table" {
|
|
|
+ weightscore := ju.FloatFormat(float64(qu.Float64All(fieldscore["kvweight"]))+float64(tmps[tmpsindex].Weight)/float64(taglength), 4)
|
|
|
+ tmps[tmpsindex].Score += weightscore
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "kv权重打分", Code: "kv-weight", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: weightscore})
|
|
|
+ } else {
|
|
|
+ //正则权重,暂不考虑
|
|
|
+ }
|
|
|
} else {
|
|
|
- //正则权重,暂不考虑
|
|
|
+ if tmpsvalue.Type == "colon" || tmpsvalue.Type == "space" || tmpsvalue.Type == "table" {
|
|
|
+ weightscore := ju.FloatFormat(float64(qu.Float64All(CommonScore["kvweight"]))+float64(tmps[tmpsindex].Weight)/float64(taglength), 4)
|
|
|
+ tmps[tmpsindex].Score += weightscore
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "kv权重打分", Code: "kv-weight", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: weightscore})
|
|
|
+ } else {
|
|
|
+ //正则权重,暂不考虑
|
|
|
+ }
|
|
|
}
|
|
|
+ lockscore.Lock()
|
|
|
scoreRule := SoreConfig[field]
|
|
|
+ lockscore.Unlock()
|
|
|
if scoreRule == nil {
|
|
|
continue
|
|
|
}
|