Ver Fonte

领域:标的物分类算法~优化~阈值选取

zhengkun há 2 anos atrás
pai
commit
be44f84a11

+ 0 - 3
fieldproject_medical/data_preparation/src/hospital/hospital_extra.go

@@ -82,14 +82,11 @@ func compareHospital() {
 					class.Save_Mgo.UpdateById(merge_hospital_coll_1, info_id, map[string]interface{}{
 						"$set": info_update,
 					})
-
 				}
 			}
 		}
-
 		tmp = make(map[string]interface{})
 	}
-
 	log.Debug("is over ", total, isok)
 }
 

+ 1 - 2
fieldproject_medical/data_preparation/src/main.go

@@ -4,7 +4,6 @@ import (
 	"class"
 	"flag"
 	log "github.com/donnie4w/go-logger/logger"
-	"hospital"
 	"net/http"
 	"time"
 )
@@ -17,7 +16,7 @@ func main() {
 	//处理医院
 	//hospital.RunHospital()
 	//暂时需要线上补充~数据
-	hospital.RunHospitalOnline()
+	//hospital.RunHospitalOnline()
 	//导入信息~医疗关联sql表
 	//hospital.ExportHospitalInfoToMysql()
 

+ 4 - 2
fieldproject_medical/data_service/src/bidding/bidding.go

@@ -26,7 +26,7 @@ func RunPurchasingInfo(gtid string, lteid string) {
 		},
 	}
 	it := sess.DB(ul.Mgo.DbName).C(ul.S_Bidding_Coll).Find(&q).Sort("_id").Select(fields).Iter()
-	pool := make(chan bool, 10)
+	pool := make(chan bool, 8)
 	wg := &sync.WaitGroup{}
 	total, isok := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
@@ -96,12 +96,14 @@ func createNewPurchasingInfo(p_list []map[string]interface{}) (bool, []map[strin
 	is_exists := false
 	new_plist := []map[string]interface{}{}
 	for _, v := range p_list {
+		//去重处理~名称~品牌~型号~暂无重复
+
 		data := map[string]interface{}{}
 		itemname := qu.ObjToString(v["itemname"])
 		if itemname == "" {
 			continue
 		}
-		//根据标的物名字~打上具体的分类数据~默认均有
+		//根据标的物名字~打上具体的分类数据~
 		is_yl, yl_code := confrimYlClassCode(itemname)
 
 		if !is_exists && is_yl {

+ 1 - 1
fieldproject_medical/data_service/src/bidding/clean.go

@@ -9,7 +9,7 @@ import (
 var arcReg = regexp.MustCompile("[((].*[))]")
 
 //异常词
-var abnormalReg = regexp.MustCompile("^((医疗|普通|医用)设备|[一二三]包)$")
+var abnormalReg = regexp.MustCompile("^((医疗|普通|医用)设备|[一二三四五六七八九A-Za-z1-9]包)$")
 
 var dataLock sync.Mutex
 

+ 31 - 26
fieldproject_medical/data_service/src/bidding/purchasing.go

@@ -12,13 +12,14 @@ import (
 func ConfrimTargetMedicalClass(name string) string {
 	//清洗~名称
 	name = cleanItemName(name)
+	if utf8.RuneCountInString(name) <= 2 {
+		return "" //过短数据不进行分类
+	}
 	//完全匹配校验
 	b, med_code := completeMatching(name)
 	if b {
-		//log.Debug("完全匹配:", name, "~", med_code)
 		return med_code
 	}
-
 	//找到所有~词组的集合
 	itemArr := ul.GSE.Cut(name, true)
 	indexDocs := map[int][]string{}
@@ -46,35 +47,39 @@ func calculateSimilarityScore(indexDocs map[int][]string, itemArr []string) stri
 		v_str := strings.Join(v, "")
 		//基础分计算
 		base_score := confrimBaseScore(v, itemArr)
-		//相似分计算
+		//近义词计算
 		dice_score := strsim.Compare(v_str, itemName, strsim.DiceCoefficient())
-
+		//优化空间~高分选取阈值~低分过滤阈值~综合阈值
 		finally_score := (base_score + dice_score) / 2
-		scoreDocs[k] = qu.FloatFormat(finally_score, 2)
-		//临时记录一下分数
-		scoreDocs_1[k] = qu.FloatFormat(base_score, 2)
-		scoreDocs_2[k] = qu.FloatFormat(dice_score, 2)
+		if finally_score > 0.55 && dice_score > 0.0 {
+			scoreDocs[k] = qu.FloatFormat(finally_score, 2)
+			//临时记录一下分数
+			scoreDocs_1[k] = qu.FloatFormat(base_score, 2)
+			scoreDocs_2[k] = qu.FloatFormat(dice_score, 2)
+		}
+	}
+	if len(scoreDocs) == 0 {
+		return ""
 	}
 	//取出最高有效分~数据
 	index, score := getMaxScore(scoreDocs)
-	match_str := ""
-	if score > 0.0 {
-		match_str = strings.Join(ul.NgrmDocIndex[index], "")
-	}
-
-	//分数~临界值校验
-	if score > 0.5 {
-		//log.Debug("相似匹配:", strings.Join(itemArr, ""), "~", match_str, "~", scoreDocs_1[index], "~", scoreDocs_2[index])
-		ul.Mgo.Save("zzzzzz", map[string]interface{}{
-			"name":       strings.Join(itemArr, ""),
-			"match_name": match_str,
-			"score":      score,
-			"score_1":    scoreDocs_1[index],
-			"score_2":    scoreDocs_2[index],
-		})
-		return ul.ProductDocText[match_str]
-	}
-	return ""
+	match_str := strings.Join(ul.NgrmDocIndex[index], "")
+	med_code := ul.ProductDocText[match_str]
+	//临时~测试保存数据
+	catalog := ul.CodeCatalog[med_code]
+	ul.Mgo.Save("zzzzzz", map[string]interface{}{
+		"name":       strings.Join(itemArr, ""),
+		"match_name": match_str,
+		"score":      score,
+		"score_1":    scoreDocs_1[index],
+		"score_2":    scoreDocs_2[index],
+		"code":       med_code,
+		"class_1":    catalog["class_1"],
+		"class_2":    catalog["class_2"],
+		"class_3":    catalog["class_3"],
+		"class_4":    catalog["class_4"],
+	})
+	return med_code
 }
 
 //计算基础分值

+ 2 - 5
fieldproject_medical/data_service/src/main.go

@@ -16,11 +16,8 @@ func init() {
 
 func main() {
 	log.Debug("run main ... ")
-
-	bidding.RunPurchasingInfo(
-		"100000000000000000000000",
-		"900000000000000000000000")
-
+	bidding.RunPurchasingInfo("100000000000000000000000", "900000000000000000000000")
+	return
 	//测试相似度计算
 	http.ListenAndServe(":9991", nil)
 	time.Sleep(999 * time.Hour)

+ 12 - 10
fieldproject_medical/data_service/src/util/initcfg.go

@@ -32,6 +32,8 @@ var (
 	Medical_Level_Class = map[int]map[string]string{}
 	//标准产品名~对应编号
 	ProductDocText = map[string]string{}
+	//编号~对应具体分类详细
+	CodeCatalog = map[string]map[string]string{}
 
 	isLocal bool
 )
@@ -46,23 +48,23 @@ func InitClass() {
 //初始化mgo
 func initMgo() {
 	if isLocal {
-		Mgo = &MongodbSim{
-			MongodbAddr: "127.0.0.1:27017",
-			DbName:      "zhengkun",
-			Size:        10,
-			UserName:    "",
-			Password:    "",
-		}
-		Mgo.InitPool()
-
 		//Mgo = &MongodbSim{
-		//	MongodbAddr: "192.168.3.207:27092",
+		//	MongodbAddr: "127.0.0.1:27017",
 		//	DbName:      "zhengkun",
 		//	Size:        10,
 		//	UserName:    "",
 		//	Password:    "",
 		//}
 		//Mgo.InitPool()
+
+		Mgo = &MongodbSim{
+			MongodbAddr: "192.168.3.207:27092",
+			DbName:      "zhengkun",
+			Size:        10,
+			UserName:    "",
+			Password:    "",
+		}
+		Mgo.InitPool()
 	} else {
 		Mgo = &MongodbSim{
 			MongodbAddr: "172.17.145.163:27083,172.17.4.187:27082",

+ 20 - 0
fieldproject_medical/data_service/src/util/initmed.go

@@ -1,6 +1,7 @@
 package util
 
 import (
+	"fmt"
 	log "github.com/donnie4w/go-logger/logger"
 	"github.com/go-ego/gse"
 	qu "qfw/util"
@@ -80,3 +81,22 @@ func loadNgrmCatalogData(datasArr []map[string]interface{}) {
 	}
 	log.Debug("键词数量:", len(NgrmText), "~", len(NgrmDocIndex), "~", len(ProductDocText))
 }
+
+//加载code~对应~所有医械的分类
+func loadCodeCatalogData(datasArr []map[string]interface{}) {
+	//CodeCatalog
+	for _, v := range datasArr {
+		name := qu.ObjToString(v["name"])
+		code := qu.ObjToString(v["code"])
+		pcode := qu.ObjToString(v["pcode"])
+		level := qu.IntAll(v["level"])
+		info := map[string]string{}
+		for key, value := range CodeCatalog[pcode] {
+			info[key] = value
+		}
+		new_key := "class_" + fmt.Sprintf("%d", level)
+		info[new_key] = name
+		CodeCatalog[code] = info
+	}
+	log.Debug("代码对应整体目录~", len(CodeCatalog))
+}

+ 2 - 0
fieldproject_medical/data_service/src/util/initvcode.go

@@ -85,4 +85,6 @@ func initMedicalLevelClass() {
 	}
 	log.Debug("医疗分类~加载~完毕~", len(Medical_Level_Class))
 	loadNgrmCatalogData(*data_class)
+	loadCodeCatalogData(*data_class)
+
 }