|
@@ -12,13 +12,14 @@ import (
|
|
|
func ConfrimTargetMedicalClass(name string) string {
|
|
|
//清洗~名称
|
|
|
name = cleanItemName(name)
|
|
|
+ if utf8.RuneCountInString(name) <= 2 {
|
|
|
+ return "" //过短数据不进行分类
|
|
|
+ }
|
|
|
//完全匹配校验
|
|
|
b, med_code := completeMatching(name)
|
|
|
if b {
|
|
|
- //log.Debug("完全匹配:", name, "~", med_code)
|
|
|
return med_code
|
|
|
}
|
|
|
-
|
|
|
//找到所有~词组的集合
|
|
|
itemArr := ul.GSE.Cut(name, true)
|
|
|
indexDocs := map[int][]string{}
|
|
@@ -46,35 +47,39 @@ func calculateSimilarityScore(indexDocs map[int][]string, itemArr []string) stri
|
|
|
v_str := strings.Join(v, "")
|
|
|
//基础分计算
|
|
|
base_score := confrimBaseScore(v, itemArr)
|
|
|
- //相似分计算
|
|
|
+ //近义词计算
|
|
|
dice_score := strsim.Compare(v_str, itemName, strsim.DiceCoefficient())
|
|
|
-
|
|
|
+ //优化空间~高分选取阈值~低分过滤阈值~综合阈值
|
|
|
finally_score := (base_score + dice_score) / 2
|
|
|
- scoreDocs[k] = qu.FloatFormat(finally_score, 2)
|
|
|
- //临时记录一下分数
|
|
|
- scoreDocs_1[k] = qu.FloatFormat(base_score, 2)
|
|
|
- scoreDocs_2[k] = qu.FloatFormat(dice_score, 2)
|
|
|
+ if finally_score > 0.55 && dice_score > 0.0 {
|
|
|
+ scoreDocs[k] = qu.FloatFormat(finally_score, 2)
|
|
|
+ //临时记录一下分数
|
|
|
+ scoreDocs_1[k] = qu.FloatFormat(base_score, 2)
|
|
|
+ scoreDocs_2[k] = qu.FloatFormat(dice_score, 2)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(scoreDocs) == 0 {
|
|
|
+ return ""
|
|
|
}
|
|
|
//取出最高有效分~数据
|
|
|
index, score := getMaxScore(scoreDocs)
|
|
|
- match_str := ""
|
|
|
- if score > 0.0 {
|
|
|
- match_str = strings.Join(ul.NgrmDocIndex[index], "")
|
|
|
- }
|
|
|
-
|
|
|
- //分数~临界值校验
|
|
|
- if score > 0.5 {
|
|
|
- //log.Debug("相似匹配:", strings.Join(itemArr, ""), "~", match_str, "~", scoreDocs_1[index], "~", scoreDocs_2[index])
|
|
|
- ul.Mgo.Save("zzzzzz", map[string]interface{}{
|
|
|
- "name": strings.Join(itemArr, ""),
|
|
|
- "match_name": match_str,
|
|
|
- "score": score,
|
|
|
- "score_1": scoreDocs_1[index],
|
|
|
- "score_2": scoreDocs_2[index],
|
|
|
- })
|
|
|
- return ul.ProductDocText[match_str]
|
|
|
- }
|
|
|
- return ""
|
|
|
+ match_str := strings.Join(ul.NgrmDocIndex[index], "")
|
|
|
+ med_code := ul.ProductDocText[match_str]
|
|
|
+ //临时~测试保存数据
|
|
|
+ catalog := ul.CodeCatalog[med_code]
|
|
|
+ ul.Mgo.Save("zzzzzz", map[string]interface{}{
|
|
|
+ "name": strings.Join(itemArr, ""),
|
|
|
+ "match_name": match_str,
|
|
|
+ "score": score,
|
|
|
+ "score_1": scoreDocs_1[index],
|
|
|
+ "score_2": scoreDocs_2[index],
|
|
|
+ "code": med_code,
|
|
|
+ "class_1": catalog["class_1"],
|
|
|
+ "class_2": catalog["class_2"],
|
|
|
+ "class_3": catalog["class_3"],
|
|
|
+ "class_4": catalog["class_4"],
|
|
|
+ })
|
|
|
+ return med_code
|
|
|
}
|
|
|
|
|
|
//计算基础分值
|