Bladeren bron

备份~医院信息处理

zhengkun 3 jaren geleden
bovenliggende
commit
ecb7bb8ee3

+ 22 - 47
filedproject_medical/data_preparation/src/class/initdata.go

@@ -6,11 +6,12 @@ import (
 )
 
 var (
-	Save_Mgo, Spi_Mgo           *MongodbSim
-	TimeLayout                  = "2006-01-02 15:04:05"
-	TimeTmeplate                = "2006-01-02"
-	MysqlTool                   *Mysql
-	Medical_Type, Medical_Level map[string]string
+	Save_Mgo, Spi_Mgo *MongodbSim
+	TimeLayout        = "2006-01-02 15:04:05"
+	TimeTmeplate      = "2006-01-02"
+	MysqlTool         *Mysql
+	Medical_Type      = map[string]string{}
+	Medical_Level     = map[string]string{}
 
 	isLocal      bool
 	YXBK_DATA    = map[string]map[string]interface{}{} //医学百科相关数据
@@ -20,8 +21,8 @@ var (
 func InitClass() {
 	isLocal = true //本地
 	initMgo()
-	//initMysql()
-	initClassCode() //加载相关代码表
+	initMysql()
+	initVCode() //加载相关代码表
 	//initLocalData()
 }
 
@@ -39,7 +40,7 @@ func initMgo() {
 
 		//Save_Mgo = &MongodbSim{
 		//	MongodbAddr: "192.168.3.207:27092",
-		//	DbName:      "majiajia",
+		//	DbName:      "zhengkun",
 		//	Size:        10,
 		//	UserName:    "",
 		//	Password:    "",
@@ -86,47 +87,21 @@ func initMysql() {
 
 }
 
-func initClassCode() {
-	//加载代码表~
-	Medical_Level = map[string]string{
-		"一级甲等": "01",
-		"一级乙等": "02",
-		"一级丙等": "03",
-		"一级其它": "04",
-		"二级甲等": "05",
-		"二级乙等": "06",
-		"二级丙等": "07",
-		"二级其它": "08",
-		"三级甲等": "09",
-		"三级乙等": "10",
-		"三级丙等": "11",
-		"三级特等": "12",
-		"三级其它": "13",
-		"其它":   "14",
+func initVCode() {
+	//加载代码表~level
+	data_level := MysqlTool.Find("code_medical_institution_level", nil, "name", "", -1, -1)
+	for _, v := range *data_level {
+		name := qu.ObjToString(v["name"])
+		Medical_Level[name] = name
 	}
-
-	Medical_Type = map[string]string{
-		"综合医院":     "01",
-		"专科医院":     "02",
-		"中医医院":     "03",
-		"民族医院":     "04",
-		"康复医院":     "05",
-		"妇幼保健院":    "06",
-		"社区卫生服务中心": "07",
-		"乡镇卫生院":    "08",
-		"疗养院":      "09",
-		"门诊部":      "10",
-		"诊所及卫生所":   "11",
-		"村卫生室":     "12",
-		"急救中心":     "13",
-		"防治院":      "14",
-		"护理中心":     "15",
-		"检验诊断中心":   "16",
-		"美容整形":     "17",
-		"公司医院":     "18",
-		"校医院":      "19",
-		"其它":       "20",
+	log.Debug("医疗等级表~", len(Medical_Level))
+	//加载代码表~type
+	data_type := MysqlTool.Find("code_medical_institution_type", nil, "name", "", -1, -1)
+	for _, v := range *data_type {
+		name := qu.ObjToString(v["name"])
+		Medical_Type[name] = name
 	}
+	log.Debug("医疗类型表~", len(Medical_Type))
 }
 
 //准备医学百科数据~

+ 13 - 3
filedproject_medical/data_preparation/src/hospital/hospital.go

@@ -1,5 +1,7 @@
 package hospital
 
+import "time"
+
 //开始执行医院数据
 func RunBuildHospitalInfo() {
 	//整合整体~医院信息
@@ -7,12 +9,20 @@ func RunBuildHospitalInfo() {
 	dealWithHospitalBaseInfo("f_hospital_hdf", "hospital_hdf_ain_depart", "好大夫")
 	dealWithHospitalBaseInfo("f_hospital_yydq", "hospital_yydq_ain_depart", "医院大全")
 	dealWithHospitalBaseInfo("f_hospital_yyyc", "", "医药英才网")
+	time.Sleep(10 * time.Second)
 }
 
 //开始执行清洗医院数据
 func RunCleanHospitalInfo() {
 	//清洗~医院信息~名称
-	//cleanHospitalInfoData()
-
-	//resetRepeatHospital()
+	cleanHospitalInfoData()
+	time.Sleep(10 * time.Second)
+	resetRepeatHospital()
+	time.Sleep(10 * time.Second)
 }
+
+//合并判重后的医院信息~
+func RunMergeHospitalInfo() {
+	mergeRepeatHospital()
+	time.Sleep(10 * time.Second)
+}

+ 195 - 63
filedproject_medical/data_preparation/src/hospital/hospital_clean.go

@@ -14,7 +14,7 @@ var cleanReg_1 = regexp.MustCompile("[((](民营|国营|自立)[))]")
 var suffixReg_1 = regexp.MustCompile("(院)(.*科)$")
 var suffixReg_2 = regexp.MustCompile("院[))]?")
 var suffixReg_3 = regexp.MustCompile("([((].*[))])$")
-var suffixReg_4 = regexp.MustCompile("[((]原[::]?(.*)[))]$")
+var suffixReg_4 = regexp.MustCompile("[((]原([::])?(.*)[))]$")
 var suffixReg_5 = regexp.MustCompile("(院|区|部)[))]$")
 
 //整合医院数据
@@ -24,36 +24,24 @@ func cleanHospitalInfoData() {
 	sess := class.Save_Mgo.GetMgoConn()
 	defer class.Save_Mgo.DestoryMongoConn(sess)
 	q := map[string]interface{}{}
-	it := sess.DB(class.Save_Mgo.DbName).C(save_coll).Find(&q).Sort("_id").Select(map[string]interface{}{
-		"name":  1,
-		"alias": 1,
-	}).Iter()
+	it := sess.DB(class.Save_Mgo.DbName).C(save_coll).Find(&q).Sort("_id").Iter()
 	total, isok := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
+		if total%5000 == 0 {
 			log.Debug("cur index ", total, "~", isok)
 		}
-		old_name := qu.ObjToString(tmp["name"])
-		alias := qu.ObjToString(tmp["alias"])
-		b, new_name, his_name := cleanHospitalName(old_name)
+		//更新表
+		update := map[string]interface{}{}
 		tmpid := class.BsonTOStringId(tmp["_id"])
-		if b {
+		//清洗当前别名
+		cleanHospitalAlias(tmp, &update)
+		//清洗医院名称~涉及别名
+		cleanHospitalName(tmp, &update)
+		//清洗等级,类型,性质
+		cleanHospitalLevelTypes(tmp, &update)
+
+		if len(update) > 0 {
 			isok++
-			update := map[string]interface{}{
-				"name":     new_name,
-				"old_name": old_name,
-			}
-			new_alias := alias
-			if his_name != "" {
-				if new_alias == "" {
-					new_alias = his_name
-				} else {
-					if !strings.Contains(new_alias, his_name) {
-						new_alias = new_alias + "," + his_name
-					}
-				}
-				update["alias"] = new_alias
-			}
 			class.Save_Mgo.UpdateById(save_coll, tmpid, map[string]interface{}{
 				"$set": update,
 			})
@@ -63,50 +51,152 @@ func cleanHospitalInfoData() {
 
 	log.Debug("清洗医院信息~~over~~", total, "~", isok)
 
-	resetRepeatHospital()
 }
 
-//清洗医院名称
-func cleanHospitalName(name string) (bool, string, string) {
+//清洗医院别名
+func cleanHospitalAlias(tmp map[string]interface{}, update *map[string]interface{}) {
+	alias := qu.ObjToString(tmp["alias"])
 	is_clean := false
-	new_name := name
-	his_name := ""
-	//
-	if cleanReg_1.MatchString(name) {
-		new_name = cleanReg_1.ReplaceAllString(name, "")
-		name = new_name
-		is_clean = true
+	if alias != "" {
+		new_arr := []string{}
+		arr := strings.Split(alias, ",")
+		for _, v := range arr {
+			b, new_v, _ := standardname(v)
+			is_clean = b
+			if b {
+				new_arr = append(new_arr, new_v)
+			} else {
+				new_arr = append(new_arr, v)
+			}
+		}
+		if is_clean {
+			new_alias := strings.Join(new_arr, ",")
+			(*update)["alias"] = new_alias
+			(*update)["old_alias"] = alias
+		}
 	}
-	//清洗 XX科室
-	if suffixReg_1.MatchString(name) { //需要采用截取的方式
-		index_arr := suffixReg_2.FindAllStringIndex(name, -1)
-		last := index_arr[len(index_arr)-1]
-		last_index := last[len(last)-1]
-		new_name = name[:last_index]
-		is_clean = true
+}
+
+//清洗名称~
+func cleanHospitalName(tmp map[string]interface{}, update *map[string]interface{}) {
+	name := qu.ObjToString(tmp["name"])
+	is_clean, new_name, his_name := standardname(name)
+	//是否更新
+	if is_clean {
+		(*update)["name"] = new_name
+		(*update)["old_name"] = name
+		if his_name != "" {
+			new_alias := ""
+			cur_alias := qu.ObjToString(tmp["alias"])
+			if (*update)["alias"] != nil {
+				cur_alias = qu.ObjToString((*update)["alias"])
+			}
+			if cur_alias == "" {
+				new_alias = his_name
+			} else {
+				new_alias = cur_alias + "," + his_name
+			}
+			(*update)["alias"] = qu.ObjToString(new_alias)
+			if (*update)["old_alias"] == nil {
+				(*update)["old_alias"] = qu.ObjToString(cur_alias)
+			}
+		}
 	}
+}
 
-	//清洗多余结尾~路标地址等
-	if suffixReg_3.MatchString(name) {
-		index_arr := suffixReg_3.FindAllStringIndex(name, -1)
-		last := index_arr[len(index_arr)-1]
-		strat_index := last[0]
-		last_index := last[len(last)-1]
-		suffix_name := name[strat_index:last_index]
-		if suffixReg_4.MatchString(suffix_name) {
-			his_name = suffixReg_4.ReplaceAllString(suffix_name, "${1}")
-			new_name = suffixReg_4.ReplaceAllString(name, "")
-			is_clean = true
+//清洗等级医院以及类型
+func cleanHospitalLevelTypes(tmp map[string]interface{}, update *map[string]interface{}) {
+	med_level := qu.ObjToString(tmp["level"])
+	med_type := qu.ObjToString(tmp["type"])
+	med_bus_type := qu.ObjToString(tmp["business_type"])
+	new_level, new_type, new_bus_type := "其它", "其它", "其它"
+	//医疗性质相关
+	if med_bus_type != "" {
+		new_bus_type = relevanceBusType(med_bus_type)
+		arr := strings.Split(med_bus_type, "/")
+		if len(arr) == 2 { //针对~类型
+			new_type = relevanceType(arr[1])
 		}
-		if utf8.RuneCountInString(suffix_name) == 4 {
-			if !suffixReg_5.MatchString(suffix_name) {
-				new_name = strings.ReplaceAll(name, suffix_name, "")
-				is_clean = true
-			}
+	}
+
+	//医疗等级相关
+	if med_level != "" {
+		new_level = relevanceLevel(med_level)
+		if new_bus_type == "其它" { //针对~性质
+			new_bus_type = relevanceBusType(med_bus_type)
 		}
 	}
 
-	return is_clean, new_name, his_name
+	//医疗类型相关~采集异常~需要清洗
+	if med_type != "" && new_type == "其它" {
+		new_type = relevanceType(med_type)
+		if new_level == "其它" {
+			new_level = relevanceLevel(new_type)
+		}
+		if new_bus_type == "其它" {
+			new_bus_type = relevanceBusType(new_type)
+		}
+	}
+
+	if med_level != new_level {
+		(*update)["level"] = new_level
+		(*update)["old_level"] = med_level
+	}
+	if med_type != new_type {
+		(*update)["type"] = new_type
+		(*update)["old_type"] = med_type
+	}
+	if med_bus_type != new_bus_type {
+		(*update)["business_type"] = new_bus_type
+		(*update)["old_business_type"] = med_bus_type
+	}
+
+}
+
+// level 医疗等级对比
+func relevanceLevel(med_level string) string {
+	new_level := "其它"
+	if class.Medical_Level[med_level] != "" {
+		new_level = med_level
+	} else { //特殊描述映射关系~
+		if med_lev_Reg1.MatchString(med_level) {
+			med_level = med_lev_Reg1.ReplaceAllString(med_level, "${1}级${2}等")
+		}
+		if med_lev_Reg2.MatchString(med_level) {
+			med_level = med_lev_Reg2.ReplaceAllString(med_level, "${1}${2}其它")
+		}
+	}
+	if class.Medical_Level[med_level] != "" {
+		new_level = med_level
+	}
+	return new_level
+}
+
+//type 类型划分
+func relevanceType(med_type string) string {
+	new_type := ""
+	//扩展~包含关系~等等规则
+	if class.Medical_Type[med_type] != "" {
+		new_type = med_type
+	} else {
+		new_type = "其它"
+	}
+	return new_type
+}
+
+//bus_type 性质划分
+func relevanceBusType(med_bus_type string) string {
+	new_bus_type := "其它"
+	//0公立、1民营、2其它
+	if strings.Contains(med_bus_type, "公立") ||
+		strings.Contains(med_bus_type, "国营") {
+		new_bus_type = "公立"
+	}
+	if strings.Contains(med_bus_type, "民营") ||
+		strings.Contains(med_bus_type, "个体户") {
+		new_bus_type = "民营"
+	}
+	return new_bus_type
 }
 
 //重置重复标记~
@@ -119,10 +209,10 @@ func resetRepeatHospital() {
 	it := sess.DB(class.Save_Mgo.DbName).C(save_coll).Find(&q).Sort("_id").Select(map[string]interface{}{
 		"name": 1,
 	}).Iter()
-	total := 0
+	total, isok := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
-			log.Debug("cur index ", total)
+		if total%5000 == 0 {
+			log.Debug("cur index ", total, "~", isok)
 		}
 		name := qu.ObjToString(tmp["name"])
 		tmpid := class.BsonTOStringId(tmp["_id"])
@@ -134,6 +224,7 @@ func resetRepeatHospital() {
 				"repeat_id": "",
 			}
 		} else {
+			isok++
 			update = map[string]interface{}{
 				"repeat":    1,
 				"repeat_id": data_hospitals[name],
@@ -144,6 +235,47 @@ func resetRepeatHospital() {
 		})
 		tmp = make(map[string]interface{})
 	}
-	log.Debug("重置~~完毕~~over~~")
+	log.Debug("重置~~完毕~~over~~", total, "~", isok)
+
+}
+
+//标准名称~~
+func standardname(s_name string) (bool, string, string) {
+	//清洗数据
+	is_clean := false
+	new_name := s_name
+	his_name := ""
 
+	if cleanReg_1.MatchString(new_name) {
+		new_name = cleanReg_1.ReplaceAllString(new_name, "")
+		is_clean = true
+	}
+	//清洗 XX科室
+	if suffixReg_1.MatchString(new_name) { //需要采用截取的方式
+		index_arr := suffixReg_2.FindAllStringIndex(new_name, -1)
+		last := index_arr[len(index_arr)-1]
+		last_index := last[len(last)-1]
+		new_name = new_name[:last_index]
+		is_clean = true
+	}
+	//清洗多余结尾~路标地址等
+	if suffixReg_3.MatchString(new_name) {
+		index_arr := suffixReg_3.FindAllStringIndex(new_name, -1)
+		last := index_arr[len(index_arr)-1]
+		strat_index := last[0]
+		last_index := last[len(last)-1]
+		suffix_name := new_name[strat_index:last_index]
+		if suffixReg_4.MatchString(suffix_name) {
+			his_name = suffixReg_4.ReplaceAllString(suffix_name, "${2}")
+			new_name = suffixReg_4.ReplaceAllString(new_name, "")
+			is_clean = true
+		}
+		if utf8.RuneCountInString(suffix_name) == 4 {
+			if !suffixReg_5.MatchString(suffix_name) {
+				new_name = strings.ReplaceAll(new_name, suffix_name, "")
+				is_clean = true
+			}
+		}
+	}
+	return is_clean, new_name, his_name
 }

+ 9 - 9
filedproject_medical/data_preparation/src/hospital/hospital_info.go

@@ -3,6 +3,7 @@ package hospital
 import (
 	"class"
 	log "github.com/donnie4w/go-logger/logger"
+	"github.com/uuid"
 	qu "qfw/util"
 	"strings"
 	"sync"
@@ -92,21 +93,20 @@ func treatHospitalInfo(tmp map[string]interface{}) map[string]interface{} {
 	city := qu.ObjToString(tmp["city"])
 	district := qu.ObjToString(tmp["district"])
 	if city == "" { //补充~省份城市信息
-		//supplementRegionally(&area, &city, &district, hospitalname, qu.ObjToString(tmp["address"]))
+		supplementRegionally(&area, &city, &district, hospitalname, qu.ObjToString(tmp["address"]))
 	}
 	data["area"] = area
 	data["area"] = city
 	data["area"] = district
 
-	company_id := ""
-	//关联企业信息~查询顺序~企业~特殊~自生
-	//company_id = inquirBaseInfoid(hospitalname)
-	//if company_id == "" {
-	//	company_id = uuid.New().String()
-	//	//新增一个信息来源
-	//}
+	company_id, mark_id := "", 1
+	company_id = inquirBaseInfoid(hospitalname)
+	if company_id == "" {
+		company_id = uuid.New().String()
+		mark_id = 0
+	}
 	data["company_id"] = company_id
-
+	data["mark_id"] = mark_id
 	return data
 }
 

+ 1 - 0
filedproject_medical/data_preparation/src/hospital/hospital_merge.go

@@ -6,6 +6,7 @@ import (
 	qu "qfw/util"
 )
 
+
 //最终合并判重后的医院数据
 func mergeRepeatHospital() {
 	log.Debug("开始合并重复后的数据~~~")

+ 0 - 88
filedproject_medical/data_preparation/src/hospital/hospital_method.go

@@ -3,7 +3,6 @@ package hospital
 import (
 	"class"
 	"encoding/json"
-	log "github.com/donnie4w/go-logger/logger"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 	"io/ioutil"
 	"net/http"
@@ -15,93 +14,6 @@ import (
 var med_lev_Reg1 = regexp.MustCompile("^([一二三])([甲乙丙特])$")
 var med_lev_Reg2 = regexp.MustCompile("^([一二三])(级)(医院)?$")
 
-//类型 等级 性质  ~ 综合清洗分析验证
-func inquirHospitalLevelType(med_level string, med_type string, med_bus_type string) (string, string, string) {
-	new_level, new_type, new_bus_type := "", "", ""
-	//医疗性质相关
-	if med_bus_type != "" {
-		new_bus_type = relevanceBusType(med_bus_type)
-		//针对~类型划分
-		arr := strings.Split(med_bus_type, "/")
-		if len(arr) == 2 {
-			new_type = relevanceType(arr[1])
-		}
-	}
-	//医疗等级相关
-	if med_level != "" {
-		new_level = relevanceLevel(med_level)
-		//针对~性质划分
-		new_bus_type = relevanceBusType(med_bus_type)
-	}
-
-	//医疗类型相关~采集异常~需要清洗
-	if med_type != "" && new_type == "" {
-
-		//医疗类型
-		new_type = relevanceType(med_type)
-
-		if new_level == "" {
-			new_level = relevanceLevel(new_type)
-		}
-		if new_bus_type == "" {
-			new_bus_type = relevanceBusType(new_type)
-		}
-	}
-
-	//最终核对
-	if new_level == "" {
-		new_level = class.Medical_Level["其它"]
-	}
-	if new_type == "" {
-		new_type = class.Medical_Type["其它"]
-		log.Debug(new_type)
-	}
-	if new_bus_type == "" {
-		new_bus_type = "3"
-	}
-	return new_level, new_type, new_bus_type
-}
-
-// level 医疗等级对比
-func relevanceLevel(med_level string) string {
-	new_level := ""
-	if class.Medical_Level[med_level] != "" {
-		new_level = class.Medical_Level[med_level]
-	} else { //特殊描述映射关系
-		if med_lev_Reg1.MatchString(med_level) {
-			med_level = med_lev_Reg1.ReplaceAllString(med_level, "${1}级${2}等")
-			new_level = class.Medical_Level[med_level]
-		}
-		if med_lev_Reg2.MatchString(med_level) {
-			med_level = med_lev_Reg2.ReplaceAllString(med_level, "${1}${2}其它")
-			new_level = class.Medical_Level[med_level]
-		}
-	}
-	return new_level
-}
-
-//type 类型划分
-func relevanceType(med_type string) string {
-	new_type := ""
-	//扩展~包含关系~等等规则
-	new_type = class.Medical_Type[med_type]
-	return new_type
-}
-
-//bus_type 性质划分
-func relevanceBusType(med_bus_type string) string {
-	new_bus_type := ""
-	//0公立、1民营、2其它
-	if strings.Contains(med_bus_type, "公立") {
-		new_bus_type = "1"
-	}
-	if strings.Contains(med_bus_type, "民营") ||
-		strings.Contains(med_bus_type, "个体户") {
-		new_bus_type = "2"
-	}
-	return new_bus_type
-}
-
 //查询企业id
 func inquirBaseInfoid(name string) (company_id string) {
 	company_id = ""

+ 6 - 14
filedproject_medical/data_preparation/src/hospital/hospital_sql.go

@@ -35,11 +35,13 @@ func exportHospitalInfoToMysql() {
 			}()
 
 			data := tmp
+			//线上版~可以直接查询表
+
 			//等级~性质~代码
-			n_level, n_type, n_bus_type := inquirHospitalLevelType(qu.ObjToString(tmp["level"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["business_type"]))
-			data["level_code"] = n_level
-			data["mi_type_code"] = n_type
-			data["mi_business_type"] = qu.IntAll(n_bus_type)
+			//n_level, n_type, n_bus_type := clenaHospitalLevelType(qu.ObjToString(tmp["level"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["business_type"]))
+			//data["level_code"] = n_level
+			//data["mi_type_code"] = n_type
+			//data["mi_business_type"] = qu.IntAll(n_bus_type)
 
 			//行政区划代码~匹配
 			region_code := regionallyCode(qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["city"]), qu.ObjToString(tmp["district"]))
@@ -49,16 +51,6 @@ func exportHospitalInfoToMysql() {
 			data["comeintime"] = time.Unix(time.Now().Unix(), 0).Format(class.TimeTmeplate)
 			data["updatetime"] = time.Unix(time.Now().Unix(), 0).Format(class.TimeTmeplate)
 
-			delete(data, "_id")
-			delete(data, "area")
-			delete(data, "city")
-			delete(data, "district")
-			delete(data, "level")
-			delete(data, "type")
-			delete(data, "business_type")
-			delete(data, "departs")
-			delete(data, "alias")
-
 		}(tmp)
 		tmp = make(map[string]interface{})
 	}

+ 2 - 2
filedproject_medical/data_preparation/src/main.go

@@ -14,9 +14,9 @@ func init() {
 }
 func main() {
 	log.Debug("医疗信息相关~~准备~~")
-	//hospital.RunBuildHospitalInfo()
+	hospital.RunBuildHospitalInfo()
 	hospital.RunCleanHospitalInfo()
-
+	hospital.RunMergeHospitalInfo()
 	time.Sleep(999 * time.Hour)
 }
 

+ 0 - 1
filedproject_medical/data_preparation/src/product/product.go

@@ -39,7 +39,6 @@ func dealWithMedicalProductInfo() {
 
 			treatProductInfo(tmp)
 			//插入sql
-			
 
 		}(tmp)
 		tmp = make(map[string]interface{})

File diff suppressed because it is too large
+ 95 - 0
filedproject_medical/data_preparation/src/vcode/vcode.go


Some files were not shown because too many files changed in this diff