package extract import ( qu "qfw/util" ) //最终确认确认指定地域 func ConfirmUniqueRegionInfo(regions map[string]map[string]map[string]string, area *string, city *string, district *string) bool { if len(regions) > 1 || len(regions) == 0 { return false } for k, v := range regions { *area = k if len(v) == 1 { for k1, v1 := range v { *city = k1 if len(v1) == 1 { for k2, _ := range v1 { *district = k2 } } } } } if *area != "" && *city != "" && *district != "" { return true } return false } //完整信息 func CompleteRegionInfo(area *string, city *string, district *string) { if *area == "北京" { *city = "北京市" if *district == "北京朝阳" { //特殊情况(北京朝阳中西医结合急诊抢救中心:5a84079740d2d9bbe88bad90) *district = "朝阳区" } } else if *area == "天津" { *city = "天津市" } else if *area == "上海" { *city = "上海市" } else if *area == "重庆" { *city = "重庆市" } if *area == "" { *area = "全国" *city = "" *district = "" } } //根据词获取所有的地域 func (e *ExtractTask) takeRegionsFromWords(text string) []map[string]string { regions := []map[string]string{} //全称匹配 for pos_full, trie_full := range e.Trie_Fulls { if trie_full.Get(text) { if pos_full == 0 { if province := e.ProvinceMap[text]; province != "" { regions = append(regions, map[string]string{"area": province, "city": "", "district": ""}) } } else if pos_full == 1 { if data := e.CityFullMap[text]; data != nil { if data.P.Brief != "" && data.Name != "" { regions = append(regions, map[string]string{"area": data.P.Brief, "city": data.Name, "district": ""}) } } } else if pos_full == 2 { citys := e.DistrictCityMap[text] for _, c := range citys { if c.P.Brief != "" && c.Name != "" && text != "" { regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": text}) } } } } } //简称匹配 for pos_sim, trie_sim := range e.Trie_Sims { if trie_sim.Get(text) { if pos_sim == 0 { if text != "" { regions = append(regions, map[string]string{"area": text, "city": "", "district": ""}) } } else if pos_sim == 1 { if csMap := e.CityBriefMap[text]; csMap != nil { if csMap.P.Brief != "" && csMap.Name != "" { regions = append(regions, map[string]string{"area": csMap.P.Brief, "city": csMap.Name, "district": ""}) } } } else if pos_sim == 2 { citysArr := e.DistrictSimAndAll[text] for _, full_citys := range citysArr { for district, c := range full_citys { if c == nil || c.P == nil || c.Name == "" { continue } if c.P.Brief != "" && c.Name != "" && district != "" { regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district}) } } } } } } return regions } //文本取地域 from 1~jsondata文本 2~其他文本 func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[string]map[string]string, from int) []map[string]interface{} { regionValues := []map[string]interface{}{} if text == "" { return regionValues } wordsArr := []string{} if from == 1 { wordsArr = e.Seg_PCD.Cut(text, true) } else if from == 2 { wordsArr = e.Seg_SV.Cut(text, true) } for _, word := range wordsArr { regionArr := e.takeRegionsFromWords(word) for _, v := range regionArr { area := qu.ObjToString(v["area"]) city := qu.ObjToString(v["city"]) district := qu.ObjToString(v["district"]) UpdateRegionsInfo(area, city, district, regions) regionValues = append(regionValues, map[string]interface{}{"area": area, "city": city, "district": district}) } } return regionValues } //更新方法 func UpdateRegionsInfo(area, city, district string, regions *map[string]map[string]map[string]string) { if (*regions)[area] == nil { city_info := map[string]map[string]string{} district_info := map[string]string{} if city != "" { if district != "" { district_info[district] = district } city_info[city] = district_info } (*regions)[area] = city_info //新增 } else { city_info := (*regions)[area] if city != "" { district_info := map[string]string{} if city_info[city] != nil { district_info = city_info[city] } if district != "" { district_info[district] = district } city_info[city] = district_info (*regions)[area] = city_info } } } //同组合并后合理性校验 func ReasonableGroupRegionInfo(datas map[string]map[string]map[string]string) map[string]map[string]map[string]string { if len(datas) > 2 || len(datas) == 0 { //省份超限,无效 return map[string]map[string]map[string]string{} } uncity, undistrict := 0, 0 for _, v := range datas { uncity += len(v) for _, v1 := range v { undistrict += len(v1) } } if uncity > 3 { regions_1 := map[string]map[string]map[string]string{} for k, v := range datas { city_info := map[string]map[string]string{} if len(v) == 1 { city_info = v } regions_1[k] = city_info } //计算当前 uncity_district := 0 for _, v := range regions_1 { for _, v1 := range v { uncity_district += len(v1) } } if uncity_district > 3 { regions_2 := map[string]map[string]map[string]string{} for k, v := range regions_1 { city_info := map[string]map[string]string{} for k1, v1 := range v { district_info := map[string]string{} if len(v1) == 1 { district_info = v1 } city_info[k1] = district_info } regions_2[k] = city_info } return regions_2 } return regions_1 } if undistrict > 3 { new_regions := map[string]map[string]map[string]string{} for k, v := range datas { city_info := map[string]map[string]string{} for k1, v1 := range v { district_info := map[string]string{} if len(v1) == 1 { district_info = v1 } city_info[k1] = district_info } new_regions[k] = city_info } return new_regions } return datas } //两组比对~找寻补充,排除数据 func AnalysisIsUniqueInfo(regions map[string]map[string]map[string]string, all_regions *map[string]map[string]map[string]string) { if len(regions) == 0 { return } if len(*all_regions) == 0 { *all_regions = regions return } regionsArr := splitRegionsInfos(regions) //目标数据 all_regionsArr := splitRegionsInfos(*all_regions) //源数据 new_all_regionsArr := []map[string]string{} //新数据 for _, info := range regionsArr { area := qu.ObjToString(info["area"]) if (*all_regions)[area] == nil { continue } unmatchInfo1 := ScreenOutReasonableRegionInfo(info, &all_regionsArr, &new_all_regionsArr) if unmatchInfo1 != nil { //降级匹配~最多二级 unmatchInfo2 := ScreenOutReasonableRegionInfo(unmatchInfo1, &all_regionsArr, &new_all_regionsArr) if unmatchInfo2 != nil { //降级匹配~最多一级 ScreenOutReasonableRegionInfo(unmatchInfo2, &all_regionsArr, &new_all_regionsArr) } } } //根据最新有效地域组~重新构建所有信息 reset_regions_infos := map[string]map[string]map[string]string{} if len(new_all_regionsArr) > 0 { for _, v := range new_all_regionsArr { area := qu.ObjToString(v["area"]) city := qu.ObjToString(v["city"]) district := qu.ObjToString(v["district"]) UpdateRegionsInfo(area, city, district, &reset_regions_infos) } *all_regions = reset_regions_infos } } //选取规则方法 func ScreenOutReasonableRegionInfo(info map[string]string, regions_infosArr *[]map[string]string, new_regions_infosArr *[]map[string]string) map[string]string { area := qu.ObjToString(info["area"]) city := qu.ObjToString(info["city"]) district := qu.ObjToString(info["district"]) is_Exist := false for _, s := range *regions_infosArr { s_area := qu.ObjToString(s["area"]) s_city := qu.ObjToString(s["city"]) s_district := qu.ObjToString(s["district"]) if s_area == area && s_city == city && s_district == district { is_Exist = true *new_regions_infosArr = append(*new_regions_infosArr, info) } else { //判断是否为补充原则 if area != "" && city != "" && district != "" { //3级补2级 3级补2级 if s_area == area && s_city == city && s_district == "" { is_Exist = true *new_regions_infosArr = append(*new_regions_infosArr, info) } else if s_area == area && s_city == "" && s_district == "" { is_Exist = true *new_regions_infosArr = append(*new_regions_infosArr, info) } } else if area != "" && city != "" && district == "" { //2级补1级 if s_area == area && s_city == "" { is_Exist = true *new_regions_infosArr = append(*new_regions_infosArr, info) } } } } //若没有补充~针对二三级 再次进行去掉末位一级,再次进行一轮比对 if !is_Exist { if area != "" && city != "" && district != "" { return map[string]string{"area": area, "city": city, "district": ""} } if area != "" && city != "" && district == "" { return map[string]string{"area": area, "city": "", "district": ""} } } return nil } //拆分地域数据~目的更好的合并选取 func splitRegionsInfos(infos map[string]map[string]map[string]string) []map[string]string { infosArr := []map[string]string{} for k, v := range infos { if len(v) > 0 { for k1, v1 := range v { if len(v1) > 0 { for k2, _ := range v1 { infosArr = append(infosArr, map[string]string{"area": k, "city": k1, "district": k2}) } } else { infosArr = append(infosArr, map[string]string{"area": k, "city": k1, "district": ""}) } } } else { infosArr = append(infosArr, map[string]string{"area": k, "city": "", "district": ""}) } } return infosArr } //日志流程记录~组级别 func LogProcessRecordingForGroupInfo(key string, valueArr []string, fieldInfos map[string]interface{}, groupInfos map[string]map[string]map[string]string, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) { groupArr := splitRegionsInfos(groupInfos) finalluArr := splitRegionsInfos(finallyInfos) data := map[string]interface{}{ key + "_value": valueArr, key + "_group": groupArr, "finally_region": finalluArr, } for k, v := range fieldInfos { data[k] = v } *logRecordInfo = append(*logRecordInfo, data) } //日志流程记录~初步 func LogProcessRecordingForTentative(key string, valueArr interface{}, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) { finallyArr := splitRegionsInfos(finallyInfos) data := map[string]interface{}{ key + "_value": valueArr, "finally_region": finallyArr, } *logRecordInfo = append(*logRecordInfo, data) } //同组合并的地域数据 //func MergeGroupRegionInfo(datas_1, datas_2 map[string]map[string]map[string]string) map[string]map[string]map[string]string { // regions := map[string]map[string]map[string]string{} // if len(datas_1) > 0 && len(datas_2) == 0 { // return datas_1 // } // if len(datas_2) > 0 && len(datas_1) == 0 { // return datas_2 // } // for k, v := range datas_1 { // area, city, district := "", "", "" // area = k // if len(v) > 0 { // for k1, v1 := range v { // city = k1 // if len(v1) > 0 { // for k2, _ := range v1 { // district = k2 // UpdateRegionsInfo(area, city, district, ®ions) // } // } else { // UpdateRegionsInfo(area, city, district, ®ions) // } // } // } else { // UpdateRegionsInfo(area, city, district, ®ions) // } // } // // for k, v := range datas_2 { // area, city, district := "", "", "" // area = k // if len(v) > 0 { // for k1, v1 := range v { // city = k1 // if len(v1) > 0 { // for k2, _ := range v1 { // district = k2 // UpdateRegionsInfo(area, city, district, ®ions) // } // } else { // UpdateRegionsInfo(area, city, district, ®ions) // } // } // } else { // UpdateRegionsInfo(area, city, district, ®ions) // } // } // return regions //}