|
@@ -14,7 +14,7 @@ var cleanReg_1 = regexp.MustCompile("[((](民营|国营|自立)[))]")
|
|
|
var suffixReg_1 = regexp.MustCompile("(院)(.*科)$")
|
|
|
var suffixReg_2 = regexp.MustCompile("院[))]?")
|
|
|
var suffixReg_3 = regexp.MustCompile("([((].*[))])$")
|
|
|
-var suffixReg_4 = regexp.MustCompile("[((]原[::]?(.*)[))]$")
|
|
|
+var suffixReg_4 = regexp.MustCompile("[((]原([::])?(.*)[))]$")
|
|
|
var suffixReg_5 = regexp.MustCompile("(院|区|部)[))]$")
|
|
|
|
|
|
//整合医院数据
|
|
@@ -24,36 +24,24 @@ func cleanHospitalInfoData() {
|
|
|
sess := class.Save_Mgo.GetMgoConn()
|
|
|
defer class.Save_Mgo.DestoryMongoConn(sess)
|
|
|
q := map[string]interface{}{}
|
|
|
- it := sess.DB(class.Save_Mgo.DbName).C(save_coll).Find(&q).Sort("_id").Select(map[string]interface{}{
|
|
|
- "name": 1,
|
|
|
- "alias": 1,
|
|
|
- }).Iter()
|
|
|
+ it := sess.DB(class.Save_Mgo.DbName).C(save_coll).Find(&q).Sort("_id").Iter()
|
|
|
total, isok := 0, 0
|
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
- if total%10000 == 0 {
|
|
|
+ if total%5000 == 0 {
|
|
|
log.Debug("cur index ", total, "~", isok)
|
|
|
}
|
|
|
- old_name := qu.ObjToString(tmp["name"])
|
|
|
- alias := qu.ObjToString(tmp["alias"])
|
|
|
- b, new_name, his_name := cleanHospitalName(old_name)
|
|
|
+ //更新表
|
|
|
+ update := map[string]interface{}{}
|
|
|
tmpid := class.BsonTOStringId(tmp["_id"])
|
|
|
- if b {
|
|
|
+ //清洗当前别名
|
|
|
+ cleanHospitalAlias(tmp, &update)
|
|
|
+ //清洗医院名称~涉及别名
|
|
|
+ cleanHospitalName(tmp, &update)
|
|
|
+ //清洗等级,类型,性质
|
|
|
+ cleanHospitalLevelTypes(tmp, &update)
|
|
|
+
|
|
|
+ if len(update) > 0 {
|
|
|
isok++
|
|
|
- update := map[string]interface{}{
|
|
|
- "name": new_name,
|
|
|
- "old_name": old_name,
|
|
|
- }
|
|
|
- new_alias := alias
|
|
|
- if his_name != "" {
|
|
|
- if new_alias == "" {
|
|
|
- new_alias = his_name
|
|
|
- } else {
|
|
|
- if !strings.Contains(new_alias, his_name) {
|
|
|
- new_alias = new_alias + "," + his_name
|
|
|
- }
|
|
|
- }
|
|
|
- update["alias"] = new_alias
|
|
|
- }
|
|
|
class.Save_Mgo.UpdateById(save_coll, tmpid, map[string]interface{}{
|
|
|
"$set": update,
|
|
|
})
|
|
@@ -63,50 +51,152 @@ func cleanHospitalInfoData() {
|
|
|
|
|
|
log.Debug("清洗医院信息~~over~~", total, "~", isok)
|
|
|
|
|
|
- resetRepeatHospital()
|
|
|
}
|
|
|
|
|
|
-//清洗医院名称
|
|
|
-func cleanHospitalName(name string) (bool, string, string) {
|
|
|
+//清洗医院别名
|
|
|
+func cleanHospitalAlias(tmp map[string]interface{}, update *map[string]interface{}) {
|
|
|
+ alias := qu.ObjToString(tmp["alias"])
|
|
|
is_clean := false
|
|
|
- new_name := name
|
|
|
- his_name := ""
|
|
|
- //
|
|
|
- if cleanReg_1.MatchString(name) {
|
|
|
- new_name = cleanReg_1.ReplaceAllString(name, "")
|
|
|
- name = new_name
|
|
|
- is_clean = true
|
|
|
+ if alias != "" {
|
|
|
+ new_arr := []string{}
|
|
|
+ arr := strings.Split(alias, ",")
|
|
|
+ for _, v := range arr {
|
|
|
+ b, new_v, _ := standardname(v)
|
|
|
+ is_clean = b
|
|
|
+ if b {
|
|
|
+ new_arr = append(new_arr, new_v)
|
|
|
+ } else {
|
|
|
+ new_arr = append(new_arr, v)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if is_clean {
|
|
|
+ new_alias := strings.Join(new_arr, ",")
|
|
|
+ (*update)["alias"] = new_alias
|
|
|
+ (*update)["old_alias"] = alias
|
|
|
+ }
|
|
|
}
|
|
|
- //清洗 XX科室
|
|
|
- if suffixReg_1.MatchString(name) { //需要采用截取的方式
|
|
|
- index_arr := suffixReg_2.FindAllStringIndex(name, -1)
|
|
|
- last := index_arr[len(index_arr)-1]
|
|
|
- last_index := last[len(last)-1]
|
|
|
- new_name = name[:last_index]
|
|
|
- is_clean = true
|
|
|
+}
|
|
|
+
|
|
|
+//清洗名称~
|
|
|
+func cleanHospitalName(tmp map[string]interface{}, update *map[string]interface{}) {
|
|
|
+ name := qu.ObjToString(tmp["name"])
|
|
|
+ is_clean, new_name, his_name := standardname(name)
|
|
|
+ //是否更新
|
|
|
+ if is_clean {
|
|
|
+ (*update)["name"] = new_name
|
|
|
+ (*update)["old_name"] = name
|
|
|
+ if his_name != "" {
|
|
|
+ new_alias := ""
|
|
|
+ cur_alias := qu.ObjToString(tmp["alias"])
|
|
|
+ if (*update)["alias"] != nil {
|
|
|
+ cur_alias = qu.ObjToString((*update)["alias"])
|
|
|
+ }
|
|
|
+ if cur_alias == "" {
|
|
|
+ new_alias = his_name
|
|
|
+ } else {
|
|
|
+ new_alias = cur_alias + "," + his_name
|
|
|
+ }
|
|
|
+ (*update)["alias"] = qu.ObjToString(new_alias)
|
|
|
+ if (*update)["old_alias"] == nil {
|
|
|
+ (*update)["old_alias"] = qu.ObjToString(cur_alias)
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
+}
|
|
|
|
|
|
- //清洗多余结尾~路标地址等
|
|
|
- if suffixReg_3.MatchString(name) {
|
|
|
- index_arr := suffixReg_3.FindAllStringIndex(name, -1)
|
|
|
- last := index_arr[len(index_arr)-1]
|
|
|
- strat_index := last[0]
|
|
|
- last_index := last[len(last)-1]
|
|
|
- suffix_name := name[strat_index:last_index]
|
|
|
- if suffixReg_4.MatchString(suffix_name) {
|
|
|
- his_name = suffixReg_4.ReplaceAllString(suffix_name, "${1}")
|
|
|
- new_name = suffixReg_4.ReplaceAllString(name, "")
|
|
|
- is_clean = true
|
|
|
+//清洗等级医院以及类型
|
|
|
+func cleanHospitalLevelTypes(tmp map[string]interface{}, update *map[string]interface{}) {
|
|
|
+ med_level := qu.ObjToString(tmp["level"])
|
|
|
+ med_type := qu.ObjToString(tmp["type"])
|
|
|
+ med_bus_type := qu.ObjToString(tmp["business_type"])
|
|
|
+ new_level, new_type, new_bus_type := "其它", "其它", "其它"
|
|
|
+ //医疗性质相关
|
|
|
+ if med_bus_type != "" {
|
|
|
+ new_bus_type = relevanceBusType(med_bus_type)
|
|
|
+ arr := strings.Split(med_bus_type, "/")
|
|
|
+ if len(arr) == 2 { //针对~类型
|
|
|
+ new_type = relevanceType(arr[1])
|
|
|
}
|
|
|
- if utf8.RuneCountInString(suffix_name) == 4 {
|
|
|
- if !suffixReg_5.MatchString(suffix_name) {
|
|
|
- new_name = strings.ReplaceAll(name, suffix_name, "")
|
|
|
- is_clean = true
|
|
|
- }
|
|
|
+ }
|
|
|
+
|
|
|
+ //医疗等级相关
|
|
|
+ if med_level != "" {
|
|
|
+ new_level = relevanceLevel(med_level)
|
|
|
+ if new_bus_type == "其它" { //针对~性质
|
|
|
+ new_bus_type = relevanceBusType(med_bus_type)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- return is_clean, new_name, his_name
|
|
|
+ //医疗类型相关~采集异常~需要清洗
|
|
|
+ if med_type != "" && new_type == "其它" {
|
|
|
+ new_type = relevanceType(med_type)
|
|
|
+ if new_level == "其它" {
|
|
|
+ new_level = relevanceLevel(new_type)
|
|
|
+ }
|
|
|
+ if new_bus_type == "其它" {
|
|
|
+ new_bus_type = relevanceBusType(new_type)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if med_level != new_level {
|
|
|
+ (*update)["level"] = new_level
|
|
|
+ (*update)["old_level"] = med_level
|
|
|
+ }
|
|
|
+ if med_type != new_type {
|
|
|
+ (*update)["type"] = new_type
|
|
|
+ (*update)["old_type"] = med_type
|
|
|
+ }
|
|
|
+ if med_bus_type != new_bus_type {
|
|
|
+ (*update)["business_type"] = new_bus_type
|
|
|
+ (*update)["old_business_type"] = med_bus_type
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+// level 医疗等级对比
|
|
|
+func relevanceLevel(med_level string) string {
|
|
|
+ new_level := "其它"
|
|
|
+ if class.Medical_Level[med_level] != "" {
|
|
|
+ new_level = med_level
|
|
|
+ } else { //特殊描述映射关系~
|
|
|
+ if med_lev_Reg1.MatchString(med_level) {
|
|
|
+ med_level = med_lev_Reg1.ReplaceAllString(med_level, "${1}级${2}等")
|
|
|
+ }
|
|
|
+ if med_lev_Reg2.MatchString(med_level) {
|
|
|
+ med_level = med_lev_Reg2.ReplaceAllString(med_level, "${1}${2}其它")
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if class.Medical_Level[med_level] != "" {
|
|
|
+ new_level = med_level
|
|
|
+ }
|
|
|
+ return new_level
|
|
|
+}
|
|
|
+
|
|
|
+//type 类型划分
|
|
|
+func relevanceType(med_type string) string {
|
|
|
+ new_type := ""
|
|
|
+ //扩展~包含关系~等等规则
|
|
|
+ if class.Medical_Type[med_type] != "" {
|
|
|
+ new_type = med_type
|
|
|
+ } else {
|
|
|
+ new_type = "其它"
|
|
|
+ }
|
|
|
+ return new_type
|
|
|
+}
|
|
|
+
|
|
|
+//bus_type 性质划分
|
|
|
+func relevanceBusType(med_bus_type string) string {
|
|
|
+ new_bus_type := "其它"
|
|
|
+ //0公立、1民营、2其它
|
|
|
+ if strings.Contains(med_bus_type, "公立") ||
|
|
|
+ strings.Contains(med_bus_type, "国营") {
|
|
|
+ new_bus_type = "公立"
|
|
|
+ }
|
|
|
+ if strings.Contains(med_bus_type, "民营") ||
|
|
|
+ strings.Contains(med_bus_type, "个体户") {
|
|
|
+ new_bus_type = "民营"
|
|
|
+ }
|
|
|
+ return new_bus_type
|
|
|
}
|
|
|
|
|
|
//重置重复标记~
|
|
@@ -119,10 +209,10 @@ func resetRepeatHospital() {
|
|
|
it := sess.DB(class.Save_Mgo.DbName).C(save_coll).Find(&q).Sort("_id").Select(map[string]interface{}{
|
|
|
"name": 1,
|
|
|
}).Iter()
|
|
|
- total := 0
|
|
|
+ total, isok := 0, 0
|
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
- if total%10000 == 0 {
|
|
|
- log.Debug("cur index ", total)
|
|
|
+ if total%5000 == 0 {
|
|
|
+ log.Debug("cur index ", total, "~", isok)
|
|
|
}
|
|
|
name := qu.ObjToString(tmp["name"])
|
|
|
tmpid := class.BsonTOStringId(tmp["_id"])
|
|
@@ -134,6 +224,7 @@ func resetRepeatHospital() {
|
|
|
"repeat_id": "",
|
|
|
}
|
|
|
} else {
|
|
|
+ isok++
|
|
|
update = map[string]interface{}{
|
|
|
"repeat": 1,
|
|
|
"repeat_id": data_hospitals[name],
|
|
@@ -144,6 +235,47 @@ func resetRepeatHospital() {
|
|
|
})
|
|
|
tmp = make(map[string]interface{})
|
|
|
}
|
|
|
- log.Debug("重置~~完毕~~over~~")
|
|
|
+ log.Debug("重置~~完毕~~over~~", total, "~", isok)
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+//标准名称~~
|
|
|
+func standardname(s_name string) (bool, string, string) {
|
|
|
+ //清洗数据
|
|
|
+ is_clean := false
|
|
|
+ new_name := s_name
|
|
|
+ his_name := ""
|
|
|
|
|
|
+ if cleanReg_1.MatchString(new_name) {
|
|
|
+ new_name = cleanReg_1.ReplaceAllString(new_name, "")
|
|
|
+ is_clean = true
|
|
|
+ }
|
|
|
+ //清洗 XX科室
|
|
|
+ if suffixReg_1.MatchString(new_name) { //需要采用截取的方式
|
|
|
+ index_arr := suffixReg_2.FindAllStringIndex(new_name, -1)
|
|
|
+ last := index_arr[len(index_arr)-1]
|
|
|
+ last_index := last[len(last)-1]
|
|
|
+ new_name = new_name[:last_index]
|
|
|
+ is_clean = true
|
|
|
+ }
|
|
|
+ //清洗多余结尾~路标地址等
|
|
|
+ if suffixReg_3.MatchString(new_name) {
|
|
|
+ index_arr := suffixReg_3.FindAllStringIndex(new_name, -1)
|
|
|
+ last := index_arr[len(index_arr)-1]
|
|
|
+ strat_index := last[0]
|
|
|
+ last_index := last[len(last)-1]
|
|
|
+ suffix_name := new_name[strat_index:last_index]
|
|
|
+ if suffixReg_4.MatchString(suffix_name) {
|
|
|
+ his_name = suffixReg_4.ReplaceAllString(suffix_name, "${2}")
|
|
|
+ new_name = suffixReg_4.ReplaceAllString(new_name, "")
|
|
|
+ is_clean = true
|
|
|
+ }
|
|
|
+ if utf8.RuneCountInString(suffix_name) == 4 {
|
|
|
+ if !suffixReg_5.MatchString(suffix_name) {
|
|
|
+ new_name = strings.ReplaceAll(new_name, suffix_name, "")
|
|
|
+ is_clean = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return is_clean, new_name, his_name
|
|
|
}
|