fengweiqiang 6 роки тому
батько
коміт
8d8349da93
2 змінених файлів з 22 додано та 19 видалено
  1. 21 18
      src/jy/extract/score.go
  2. 1 1
      src/jy/pretreated/analytable.go

+ 21 - 18
src/jy/extract/score.go

@@ -5,6 +5,7 @@ import (
 	"fmt"
 	ju "jy/util"
 	"log"
+	"qfw/common/src/qfw/util"
 	qu "qfw/util"
 	"regexp"
 	"strconv"
@@ -93,14 +94,16 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 			if len(tmpsvalue.BlockTag) > 0 {
 				//有标签段
 				var qz float64 = 0.0 //取权重最高的
+				var tgk string
 				for key := range tmpsvalue.BlockTag {
 					//key = "其他"//TODO 测试用
 					if TagConfig[key][field] > qz {
 						qz = TagConfig[key][field]
+						tgk = key
 					}
 				}
 				tmps[tmpsindex].Score += 2 * qz //乘以权重系数
-				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "匹配段标签权重", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: 2 * qz})
+				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "匹配段标签权重", Code: "权重系数乘以2", RuleText: "BlockTag", Type: tgk+field,  ExtFrom: "tagscore.json", Value: tmpsvalue.Value, Score: 2 * qz})
 			} else {
 				//没有段标签,走其他
 				//qz := TagConfig["其他"][field]
@@ -110,24 +113,24 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 				//是否有kv值
 				if strings.Contains(tmpsvalue.Type, "colon") {
 					tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["colon"])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["colon"])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "colonkv", Code: "fieldscore.colon", RuleText: util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "colonkv", ExtFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["colon"])})
 				} else if strings.Contains(tmpsvalue.Type, "space") {
 					tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["space"])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["space"])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "spacekv", Code: "fieldscore.space", RuleText: util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "spacekv", ExtFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["space"])})
 				} else if strings.Contains(tmpsvalue.Type, "table") {
 					tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["table"])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["table"])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "tablekv", Code: "fieldscore.table", RuleText: util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "tablekv", ExtFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["table"])})
 				}
 			}
 			if tmpsvalue.ExtFrom != "title" { //非标题抽取
 				if strings.Contains(tmpsvalue.Type, "regexp") {
 					tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["regexp"])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: "fieldscore.regexp", RuleText:  util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "regexp",  ExtFrom: "fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])})
 				}
 			} else {
 				if strings.Contains(tmpsvalue.Type, "regexp") {
 					tmps[tmpsindex].Score += qu.Float64All(SoreConfig["extractype"]["regexp"])+ 1
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])+ 1 })
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "regexp", Code: "fieldscore.regexp", RuleText:  util.ObjToString(SoreConfig["extractype"]["describe"]), Type: "regexp", ExtFrom:"fieldscore.json", Value: tmpsvalue.Value, Score: qu.Float64All(SoreConfig["extractype"]["regexp"])+ 1 })
 				}
 			}
 			scoreRule := SoreConfig[field]
@@ -142,7 +145,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 				}
 				if valueLen > 100 && field != "projectscope" {
 					tmps[tmpsindex].Score = -99
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: `valueLen > 100 && field != "projectscope"直接-99分`, Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: -99})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: `valueLen > 100 && field != "projectscope"直接-99分`, Code: field, Type: "length", Value: tmpsvalue.Value, Score: -99})
 				}
 				if lengths, ok := scoreRule["length"].([]interface{}); ok {
 					for _, tmp := range lengths {
@@ -155,13 +158,13 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 							}
 							if valueLen < min {
 								tmps[tmpsindex].Score += qu.Float64All(scores[0])
-								tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
+								tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(valueLen,"<",min), Type: field,  ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
 							} else if valueLen > max {
 								tmps[tmpsindex].Score += qu.Float64All(scores[2])
-								tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
+								tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(valueLen,">",max), Type: field, ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
 							} else {
 								tmps[tmpsindex].Score += qu.Float64All(scores[1])
-								tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
+								tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "长度打分", Code: fmt.Sprint(valueLen,">",min,"&&",valueLen,"<",max), Type: field,  ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
 							}
 						}
 					}
@@ -175,7 +178,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 									reg := p["regexp"].(*regexp.Regexp)
 									if reg.MatchString(qu.ObjToString(tmpsvalue.Value)) {
 										tmps[tmpsindex].Score += qu.Float64All(p["score"])
-										tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "负面词打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
+										tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "负面词打分"+fmt.Sprint(p["describe"]), Code:field+".negativewords" , RuleText: reg.String(), Type: "regexp",  ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
 									}
 								}
 							}, func(err interface{}) {
@@ -193,7 +196,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 									reg := p["regexp"].(*regexp.Regexp)
 									if reg.MatchString(qu.ObjToString(tmpsvalue.Value)) {
 										tmps[tmpsindex].Score += qu.Float64All(p["score"])
-										tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "正面词打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
+										tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "正面词打分"+fmt.Sprint(p["describe"]), Code: field+".positivewords", RuleText: reg.String(), Type: "regexp",  ExtFrom:  "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
 									}
 								}
 							}, func(err interface{}) {
@@ -211,7 +214,7 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 									reg := p["regexp"].(*regexp.Regexp)
 									if reg.MatchString(qu.ObjToString(tmpsvalue.Value)) {
 										tmps[tmpsindex].Score += qu.Float64All(p["score"])
-										tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "winnerorder", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
+										tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "位置打分winnerorder"+fmt.Sprint(p["describe"]), Code:  field+".winnerorder", RuleText:reg.String(), Type:  "regexp", ExtFrom:"fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
 									}
 								}
 							}, func(err interface{}) {
@@ -232,13 +235,13 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 				}
 				if val < min && 0 < val {
 					tmps[tmpsindex].Score += qu.Float64All(scores[0])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code:field+".float", RuleText: fmt.Sprint(val ,"<",min,"&&",0,"<",val),  ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
 				} else if val > max {
 					tmps[tmpsindex].Score += qu.Float64All(scores[2])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: field+".float", RuleText: fmt.Sprint(val,">",max),  ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
 				} else if val <= max && val >= min {
 					tmps[tmpsindex].Score += qu.Float64All(scores[1])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code:field+".float", RuleText: fmt.Sprintln(val,"<=", max,"&&", val,">=", min ), ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
 				}
 			}
 			//其他打分配置
@@ -253,10 +256,10 @@ func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 				}
 				if val > max {
 					tmps[tmpsindex].Score += qu.Float64All(scores[2])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "其他打分配置decimal", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "其他打分配置decimal", Code: field+".decimal", RuleText: fmt.Sprint(val ,">", max),  ExtFrom: "fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
 				} else if val <= max && val > min {
 					tmps[tmpsindex].Score += qu.Float64All(scores[1])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "其他打分配置decimal", Code: tmpsvalue.Code, RuleText: tmpsvalue.RuleText, Type: tmpsvalue.Type, MatchType: tmpsvalue.MatchType, ExtFrom: tmpsvalue.ExtFrom, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "其他打分配置decimal", Code: field+".decimal", RuleText:fmt.Sprint(val ,"<=", max ,"&&", val,">", min), ExtFrom:"fieldscore.json."+field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
 				}
 			}
 		}

+ 1 - 1
src/jy/pretreated/analytable.go

@@ -109,7 +109,7 @@ var (
 	nswinnertabletag            = regexp.MustCompile("[评得分估]+")
 	projectcodeReg              = regexp.MustCompile(`((|\(|\[){1}(编号|项目编号|标段编号){1}(:|:)(.){4,30}()|\)|\])`)
 	projectcodeReg2             = regexp.MustCompile(`(编号|项目编号|标段编号){1}(:|:)(.){4,30}[0-9]`)
-	jsonReg						= regexp.MustCompile(`\{".*\":\".+\"}`)
+	jsonReg						= regexp.MustCompile(`\{.+:[^}]*\} `)//  \{".*\":\".+\"}
 )
 
 //在解析时,判断表格元素是否隐藏