|
@@ -17,20 +17,43 @@ var (
|
|
|
TagConfig map[string]map[string]float64
|
|
|
TagConfigDesc map[string]string
|
|
|
|
|
|
- TitleScore, RepeatScore, BlockScore float64
|
|
|
+ RepeatScore, BlockScore float64
|
|
|
+ CommonScore map[string]float64
|
|
|
+ FieldsScore map[string]map[string]float64
|
|
|
)
|
|
|
|
|
|
func init() {
|
|
|
qu.ReadConfig("./res/tagscoredesc.json", &TagConfigDesc)
|
|
|
qu.ReadConfig("./res/tagscore.json", &TagConfig)
|
|
|
qu.ReadConfig("./res/fieldscore.json", &SoreConfig)
|
|
|
- TitleScore = qu.Float64All(SoreConfig["extractype"]["title"])
|
|
|
if repeat, ok := SoreConfig["other"]["repeat"].(map[string]interface{}); ok {
|
|
|
RepeatScore = qu.Float64All(repeat["score"])
|
|
|
}
|
|
|
if block, ok := SoreConfig["other"]["block"].(map[string]interface{}); ok {
|
|
|
BlockScore = qu.Float64All(block["score"])
|
|
|
}
|
|
|
+ //通用抽取属性打分配置
|
|
|
+ if tmp, ok := SoreConfig["extractype"]["common"].(map[string]interface{}); ok {
|
|
|
+ CommonScore = map[string]float64{}
|
|
|
+ for k, v := range tmp {
|
|
|
+ CommonScore[k] = qu.Float64All(v)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ log.Println(CommonScore)
|
|
|
+ //指定抽取属性打分配置
|
|
|
+ if tmp, ok := SoreConfig["extractype"]["fields"].(map[string]interface{}); ok {
|
|
|
+ FieldsScore = map[string]map[string]float64{}
|
|
|
+ for key, fieldmap := range tmp {
|
|
|
+ fieldscore := map[string]float64{}
|
|
|
+ if field, ok := fieldmap.(map[string]interface{}); ok {
|
|
|
+ for k, score := range field {
|
|
|
+ fieldscore[k] = qu.Float64All(score)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ FieldsScore[key] = fieldscore
|
|
|
+ }
|
|
|
+ }
|
|
|
+ log.Println(FieldsScore)
|
|
|
//实例化正则
|
|
|
for _, tmp := range SoreConfig {
|
|
|
//log.Println(tmp)
|
|
@@ -102,9 +125,10 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
result := j.Result
|
|
|
for field, tmps := range result {
|
|
|
for tmpsindex, tmpsvalue := range tmps {
|
|
|
+ describe := qu.ObjToString(SoreConfig["extractype"]["describe"])
|
|
|
if tmpsvalue.ExtFrom == "title" { //标题打分初始化
|
|
|
- tmps[tmpsindex].Score += TitleScore
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "title初始化", Code: "fieldscore.title", RuleText: qu.ObjToString(SoreConfig["extractype"]["describe"]), ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: TitleScore})
|
|
|
+ tmps[tmpsindex].Score += CommonScore["title"]
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "title初始化", Code: "fieldscore.title", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["title"]})
|
|
|
}
|
|
|
//是否有段标签
|
|
|
if len(tmpsvalue.BlockTag) > 0 {
|
|
@@ -125,23 +149,42 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
}
|
|
|
|
|
|
//抽取类型打分
|
|
|
- if strings.Contains(tmpsvalue.Type, "colon") {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["colon"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: "fieldscore.colon", RuleText: qu.ObjToString(SoreConfig["extractype"]["describe"]), ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["colon"])})
|
|
|
- } else if strings.Contains(tmpsvalue.Type, "space") {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["space"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: "fieldscore.space", RuleText: qu.ObjToString(SoreConfig["extractype"]["describe"]), ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["space"])})
|
|
|
- } else if strings.Contains(tmpsvalue.Type, "table") {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["table"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: "fieldscore.table", RuleText: qu.ObjToString(SoreConfig["extractype"]["describe"]), ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["table"])})
|
|
|
- } else if strings.Contains(tmpsvalue.Type, "regexp") {
|
|
|
- tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["regexp"])
|
|
|
- tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: "fieldscore.regexp", RuleText: qu.ObjToString(SoreConfig["extractype"]["describe"]), ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])})
|
|
|
+ if FieldsScore[field] != nil { //指定抽取属性打分配置
|
|
|
+ fieldscore := FieldsScore[field]
|
|
|
+ if strings.Contains(tmpsvalue.Type, "colon") {
|
|
|
+ tmps[tmpsindex].Score += fieldscore["colon"]
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: "fieldscore.colon", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["colon"]})
|
|
|
+ } else if strings.Contains(tmpsvalue.Type, "space") {
|
|
|
+ tmps[tmpsindex].Score += fieldscore["space"]
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: "fieldscore.space", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["space"]})
|
|
|
+ } else if strings.Contains(tmpsvalue.Type, "table") {
|
|
|
+ tmps[tmpsindex].Score += fieldscore["table"]
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: "fieldscore.table", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["table"]})
|
|
|
+ } else if strings.Contains(tmpsvalue.Type, "regexp") {
|
|
|
+ tmps[tmpsindex].Score += fieldscore["regexp"]
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: "fieldscore.regexp", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: fieldscore["regexp"]})
|
|
|
+ }
|
|
|
+ } else { //通用抽取属性打分配置
|
|
|
+ if strings.Contains(tmpsvalue.Type, "colon") {
|
|
|
+ tmps[tmpsindex].Score += CommonScore["colon"]
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: "fieldscore.colon", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["colon"]})
|
|
|
+ } else if strings.Contains(tmpsvalue.Type, "space") {
|
|
|
+ tmps[tmpsindex].Score += CommonScore["space"]
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: "fieldscore.space", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["space"]})
|
|
|
+ } else if strings.Contains(tmpsvalue.Type, "table") {
|
|
|
+ tmps[tmpsindex].Score += CommonScore["table"]
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: "fieldscore.table", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["table"]})
|
|
|
+ } else if strings.Contains(tmpsvalue.Type, "regexp") {
|
|
|
+ tmps[tmpsindex].Score += CommonScore["regexp"]
|
|
|
+ tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: "fieldscore.regexp", RuleText: describe, ScoreFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: CommonScore["regexp"]})
|
|
|
+ }
|
|
|
}
|
|
|
+
|
|
|
scoreRule := SoreConfig[field]
|
|
|
if scoreRule == nil {
|
|
|
continue
|
|
|
}
|
|
|
+ //配置打分
|
|
|
if scoreRule["type"] == "string" {
|
|
|
//1.长度打分
|
|
|
valueLen := utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value))
|
|
@@ -207,7 +250,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- //4.位置打分
|
|
|
+ //4.中标候选人打分
|
|
|
if winnerorders, ok := scoreRule["winnerorder"].([]interface{}); ok {
|
|
|
for _, winnerorder := range winnerorders {
|
|
|
if p, ok := winnerorder.(map[string]interface{}); ok {
|