|
@@ -5,6 +5,7 @@ import (
|
|
|
"fmt"
|
|
|
ju "jy/util"
|
|
|
"log"
|
|
|
+ "qfw/common/src/qfw/util"
|
|
|
qu "qfw/util"
|
|
|
"regexp"
|
|
|
"strconv"
|
|
@@ -93,14 +94,16 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
if len(tmpsvalue.BlockTag) > 0 {
|
|
|
//有标签段
|
|
|
var qz float64 = 0.0 //取权重最高的
|
|
|
+ var tgk string
|
|
|
for key := range tmpsvalue.BlockTag {
|
|
|
//key = "其他"//TODO 测试用
|
|
|
if TagConfig[key][field] > qz {
|
|
|
qz = TagConfig[key][field]
|
|
|
+ tgk = key
|
|
|
}
|
|
|
}
|
|
|
tmps[tmpsindex].Score += 2 * qz //乘以权重系数
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "匹配段标签权重", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: 2 * qz})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "匹配段标签权重", Code: "权重系数乘以2", RuleText: "BlockTag", Type: tgk+field, ExtFrom: "tagscore.json", Value: tmpsvalue.Value, Score: 2 * qz})
|
|
|
} else {
|
|
|
//没有段标签,走其他
|
|
|
//qz := TagConfig["其他"][field]
|
|
@@ -110,24 +113,24 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
//是否有kv值
|
|
|
if strings.Contains(tmpsvalue.Type, "colon") {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["colon"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["colon"])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: "fieldscore.colon", RuleText: util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "colonkv", ExtFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["colon"])})
|
|
|
} else if strings.Contains(tmpsvalue.Type, "space") {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["space"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["space"])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: "fieldscore.space", RuleText: util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "spacekv", ExtFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["space"])})
|
|
|
} else if strings.Contains(tmpsvalue.Type, "table") {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["table"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["table"])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: "fieldscore.table", RuleText: util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "tablekv", ExtFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["table"])})
|
|
|
}
|
|
|
}
|
|
|
if tmpsvalue.ExtFrom != "title" { //非标题抽取
|
|
|
if strings.Contains(tmpsvalue.Type, "regexp") {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["regexp"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: "fieldscore.regexp", RuleText: util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "regexp", ExtFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])})
|
|
|
}
|
|
|
} else {
|
|
|
if strings.Contains(tmpsvalue.Type, "regexp") {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["regexp"])+ 1
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])+ 1 })
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: "fieldscore.regexp", RuleText: util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "regexp", ExtFrom:"fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])+ 1 })
|
|
|
}
|
|
|
}
|
|
|
scoreRule := SoreConfig[field]
|
|
@@ -142,7 +145,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
}
|
|
|
if valueLen > 100 && field != "projectscope" {
|
|
|
tmps[tmpsindex].Score = -99
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: `valueLen > 100 && field != "projectscope"直接-99分`, Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: -99})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: `valueLen > 100 && field != "projectscope"直接-99分`, Code: field, Type: "length", Value: tmpsvalue.Value, Score: -99})
|
|
|
}
|
|
|
if lengths, ok := scoreRule["length"].([]interface{}); ok {
|
|
|
for _, tmp := range lengths {
|
|
@@ -155,13 +158,13 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
}
|
|
|
if valueLen < min {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(scores[0])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(valueLen,"<",min), Type: field, ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
|
|
|
} else if valueLen > max {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(scores[2])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(valueLen,">",max), Type: field, ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
|
|
|
} else {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(scores[1])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(valueLen,">",min,"&&",valueLen,"<",max), Type: field, ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -175,7 +178,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
reg := p["regexp"].(*regexp.Regexp)
|
|
|
if reg.MatchString(qu.ObjToString(tmpsvalue.Value)) {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(p["score"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "负面词打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "负面词打分"+fmt.Sprint(p["describe"]), Code:field+".negativewords" , RuleText: reg.String(), Type: "regexp", ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
|
|
|
}
|
|
|
}
|
|
|
}, func(err interface{}) {
|
|
@@ -193,7 +196,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
reg := p["regexp"].(*regexp.Regexp)
|
|
|
if reg.MatchString(qu.ObjToString(tmpsvalue.Value)) {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(p["score"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "正面词打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "正面词打分"+fmt.Sprint(p["describe"]), Code: field+".positivewords", RuleText: reg.String(), Type: "regexp", ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
|
|
|
}
|
|
|
}
|
|
|
}, func(err interface{}) {
|
|
@@ -211,7 +214,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
reg := p["regexp"].(*regexp.Regexp)
|
|
|
if reg.MatchString(qu.ObjToString(tmpsvalue.Value)) {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(p["score"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "winnerorder", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "位置打分winnerorder"+fmt.Sprint(p["describe"]), Code: field+".winnerorder", RuleText:reg.String(), Type: "regexp", ExtFrom:"fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
|
|
|
}
|
|
|
}
|
|
|
}, func(err interface{}) {
|
|
@@ -232,13 +235,13 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
}
|
|
|
if val < min && 0 < val {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(scores[0])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code:field+".float", RuleText: fmt.Sprint(val ,"<",min,"&&",0,"<",val), ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
|
|
|
} else if val > max {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(scores[2])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: field+".float", RuleText: fmt.Sprint(val,">",max), ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
|
|
|
} else if val <= max && val >= min {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(scores[1])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code:field+".float", RuleText: fmt.Sprintln(val,"<=", max,"&&", val,">=", min ), ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
|
|
|
}
|
|
|
}
|
|
|
//其他打分配置
|
|
@@ -253,10 +256,10 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
}
|
|
|
if val > max {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(scores[2])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "其他打分配置decimal", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "其他打分配置decimal", Code: field+".decimal", RuleText: fmt.Sprint(val ,">", max), ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
|
|
|
} else if val <= max && val > min {
|
|
|
tmps[tmpsindex].Score += qu.Float64All(scores[1])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "其他打分配置decimal", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "其他打分配置decimal", Code: field+".decimal", RuleText:fmt.Sprint(val ,"<=", max ,"&&", val,">", min), ExtFrom:"fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
|
|
|
}
|
|
|
}
|
|
|
}
|