|
@@ -0,0 +1,404 @@
|
|
|
|
+package extract
|
|
|
|
+
|
|
|
|
+import (
|
|
|
|
+ qu "qfw/util"
|
|
|
|
+)
|
|
|
|
+
|
|
|
|
+//最终确认确认指定地域
|
|
|
|
+func ConfirmUniqueRegionInfo(regions map[string]map[string]map[string]string, area *string, city *string, district *string) bool {
|
|
|
|
+ if len(regions) > 1 || len(regions) == 0 {
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
|
|
+ for k, v := range regions {
|
|
|
|
+ *area = k
|
|
|
|
+ if len(v) == 1 {
|
|
|
|
+ for k1, v1 := range v {
|
|
|
|
+ *city = k1
|
|
|
|
+ if len(v1) == 1 {
|
|
|
|
+ for k2, _ := range v1 {
|
|
|
|
+ *district = k2
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if *area != "" && *city != "" && *district != "" {
|
|
|
|
+ return true
|
|
|
|
+ }
|
|
|
|
+ return false
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//完整信息
|
|
|
|
+func CompleteRegionInfo(area *string, city *string, district *string) {
|
|
|
|
+ if *area == "北京" {
|
|
|
|
+ *city = "北京市"
|
|
|
|
+ if *district == "北京朝阳" { //特殊情况(北京朝阳中西医结合急诊抢救中心:5a84079740d2d9bbe88bad90)
|
|
|
|
+ *district = "朝阳区"
|
|
|
|
+ }
|
|
|
|
+ } else if *area == "天津" {
|
|
|
|
+ *city = "天津市"
|
|
|
|
+ } else if *area == "上海" {
|
|
|
|
+ *city = "上海市"
|
|
|
|
+ } else if *area == "重庆" {
|
|
|
|
+ *city = "重庆市"
|
|
|
|
+ }
|
|
|
|
+ if *area == "" {
|
|
|
|
+ *area = "全国"
|
|
|
|
+ *city = ""
|
|
|
|
+ *district = ""
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//根据词获取所有的地域
|
|
|
|
+func (e *ExtractTask) takeRegionsFromWords(text string) []map[string]string {
|
|
|
|
+ regions := []map[string]string{}
|
|
|
|
+ //全称匹配
|
|
|
|
+ for pos_full, trie_full := range e.Trie_Fulls {
|
|
|
|
+ if trie_full.Get(text) {
|
|
|
|
+ if pos_full == 0 {
|
|
|
|
+ if province := e.ProvinceMap[text]; province != "" {
|
|
|
|
+ regions = append(regions, map[string]string{"area": province, "city": "", "district": ""})
|
|
|
|
+ }
|
|
|
|
+ } else if pos_full == 1 {
|
|
|
|
+ if data := e.CityFullMap[text]; data != nil {
|
|
|
|
+ if data.P.Brief != "" && data.Name != "" {
|
|
|
|
+ regions = append(regions, map[string]string{"area": data.P.Brief, "city": data.Name, "district": ""})
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else if pos_full == 2 {
|
|
|
|
+ citys := e.DistrictCityMap[text]
|
|
|
|
+ for _, c := range citys {
|
|
|
|
+ if c.P.Brief != "" && c.Name != "" && text != "" {
|
|
|
|
+ regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": text})
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ //简称匹配
|
|
|
|
+ for pos_sim, trie_sim := range e.Trie_Sims {
|
|
|
|
+ if trie_sim.Get(text) {
|
|
|
|
+ if pos_sim == 0 {
|
|
|
|
+ if text != "" {
|
|
|
|
+ regions = append(regions, map[string]string{"area": text, "city": "", "district": ""})
|
|
|
|
+ }
|
|
|
|
+ } else if pos_sim == 1 {
|
|
|
|
+ if csMap := e.CityBriefMap[text]; csMap != nil {
|
|
|
|
+ if csMap.P.Brief != "" && csMap.Name != "" {
|
|
|
|
+ regions = append(regions, map[string]string{"area": csMap.P.Brief, "city": csMap.Name, "district": ""})
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else if pos_sim == 2 {
|
|
|
|
+ citysArr := e.DistrictSimAndAll[text]
|
|
|
|
+ for _, full_citys := range citysArr {
|
|
|
|
+ for district, c := range full_citys {
|
|
|
|
+ if c == nil || c.P == nil || c.Name == "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if c.P.Brief != "" && c.Name != "" && district != "" {
|
|
|
|
+ regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district})
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return regions
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//文本取地域 from 1~jsondata文本 2~其他文本
|
|
|
|
+func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[string]map[string]string, from int) []map[string]interface{} {
|
|
|
|
+ regionValues := []map[string]interface{}{}
|
|
|
|
+ if text == "" {
|
|
|
|
+ return regionValues
|
|
|
|
+ }
|
|
|
|
+ wordsArr := []string{}
|
|
|
|
+ if from == 1 {
|
|
|
|
+ wordsArr = e.Seg_PCD.Cut(text, true)
|
|
|
|
+ } else if from == 2 {
|
|
|
|
+ wordsArr = e.Seg_SV.Cut(text, true)
|
|
|
|
+ }
|
|
|
|
+ for _, word := range wordsArr {
|
|
|
|
+ regionArr := e.takeRegionsFromWords(word)
|
|
|
|
+ for _, v := range regionArr {
|
|
|
|
+ area := qu.ObjToString(v["area"])
|
|
|
|
+ city := qu.ObjToString(v["city"])
|
|
|
|
+ district := qu.ObjToString(v["district"])
|
|
|
|
+ UpdateRegionsInfo(area, city, district, regions)
|
|
|
|
+ regionValues = append(regionValues, map[string]interface{}{"area": area, "city": city, "district": district})
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return regionValues
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//更新方法
|
|
|
|
+func UpdateRegionsInfo(area, city, district string, regions *map[string]map[string]map[string]string) {
|
|
|
|
+ if (*regions)[area] == nil {
|
|
|
|
+ city_info := map[string]map[string]string{}
|
|
|
|
+ district_info := map[string]string{}
|
|
|
|
+ if city != "" {
|
|
|
|
+ if district != "" {
|
|
|
|
+ district_info[district] = district
|
|
|
|
+ }
|
|
|
|
+ city_info[city] = district_info
|
|
|
|
+ }
|
|
|
|
+ (*regions)[area] = city_info //新增
|
|
|
|
+ } else {
|
|
|
|
+ city_info := (*regions)[area]
|
|
|
|
+ if city != "" {
|
|
|
|
+ district_info := map[string]string{}
|
|
|
|
+ if city_info[city] != nil {
|
|
|
|
+ district_info = city_info[city]
|
|
|
|
+ }
|
|
|
|
+ if district != "" {
|
|
|
|
+ district_info[district] = district
|
|
|
|
+ }
|
|
|
|
+ city_info[city] = district_info
|
|
|
|
+ (*regions)[area] = city_info
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//同组合并后合理性校验
|
|
|
|
+func ReasonableGroupRegionInfo(datas map[string]map[string]map[string]string) map[string]map[string]map[string]string {
|
|
|
|
+ if len(datas) > 2 || len(datas) == 0 { //省份超限,无效
|
|
|
|
+ return map[string]map[string]map[string]string{}
|
|
|
|
+ }
|
|
|
|
+ uncity, undistrict := 0, 0
|
|
|
|
+ for _, v := range datas {
|
|
|
|
+ uncity += len(v)
|
|
|
|
+ for _, v1 := range v {
|
|
|
|
+ undistrict += len(v1)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if uncity > 3 {
|
|
|
|
+ regions_1 := map[string]map[string]map[string]string{}
|
|
|
|
+ for k, v := range datas {
|
|
|
|
+ city_info := map[string]map[string]string{}
|
|
|
|
+ if len(v) == 1 {
|
|
|
|
+ city_info = v
|
|
|
|
+ }
|
|
|
|
+ regions_1[k] = city_info
|
|
|
|
+ }
|
|
|
|
+ //计算当前
|
|
|
|
+ uncity_district := 0
|
|
|
|
+ for _, v := range regions_1 {
|
|
|
|
+ for _, v1 := range v {
|
|
|
|
+ uncity_district += len(v1)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if uncity_district > 3 {
|
|
|
|
+ regions_2 := map[string]map[string]map[string]string{}
|
|
|
|
+ for k, v := range regions_1 {
|
|
|
|
+ city_info := map[string]map[string]string{}
|
|
|
|
+ for k1, v1 := range v {
|
|
|
|
+ district_info := map[string]string{}
|
|
|
|
+ if len(v1) == 1 {
|
|
|
|
+ district_info = v1
|
|
|
|
+ }
|
|
|
|
+ city_info[k1] = district_info
|
|
|
|
+ }
|
|
|
|
+ regions_2[k] = city_info
|
|
|
|
+ }
|
|
|
|
+ return regions_2
|
|
|
|
+ }
|
|
|
|
+ return regions_1
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if undistrict > 3 {
|
|
|
|
+ new_regions := map[string]map[string]map[string]string{}
|
|
|
|
+ for k, v := range datas {
|
|
|
|
+ city_info := map[string]map[string]string{}
|
|
|
|
+ for k1, v1 := range v {
|
|
|
|
+ district_info := map[string]string{}
|
|
|
|
+ if len(v1) == 1 {
|
|
|
|
+ district_info = v1
|
|
|
|
+ }
|
|
|
|
+ city_info[k1] = district_info
|
|
|
|
+ }
|
|
|
|
+ new_regions[k] = city_info
|
|
|
|
+ }
|
|
|
|
+ return new_regions
|
|
|
|
+ }
|
|
|
|
+ return datas
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//两组比对~找寻补充,排除数据
|
|
|
|
+func AnalysisIsUniqueInfo(regions map[string]map[string]map[string]string, all_regions *map[string]map[string]map[string]string) {
|
|
|
|
+ if len(regions) == 0 {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ if len(*all_regions) == 0 {
|
|
|
|
+ *all_regions = regions
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ regionsArr := splitRegionsInfos(regions) //目标数据
|
|
|
|
+ all_regionsArr := splitRegionsInfos(*all_regions) //源数据
|
|
|
|
+ new_all_regionsArr := []map[string]string{} //新数据
|
|
|
|
+ for _, info := range regionsArr {
|
|
|
|
+ area := qu.ObjToString(info["area"])
|
|
|
|
+ if (*all_regions)[area] == nil {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ unmatchInfo1 := ScreenOutReasonableRegionInfo(info, &all_regionsArr, &new_all_regionsArr)
|
|
|
|
+ if unmatchInfo1 != nil { //降级匹配~最多二级
|
|
|
|
+ unmatchInfo2 := ScreenOutReasonableRegionInfo(unmatchInfo1, &all_regionsArr, &new_all_regionsArr)
|
|
|
|
+ if unmatchInfo2 != nil { //降级匹配~最多一级
|
|
|
|
+ ScreenOutReasonableRegionInfo(unmatchInfo2, &all_regionsArr, &new_all_regionsArr)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ //根据最新有效地域组~重新构建所有信息
|
|
|
|
+ reset_regions_infos := map[string]map[string]map[string]string{}
|
|
|
|
+ if len(new_all_regionsArr) > 0 {
|
|
|
|
+ for _, v := range new_all_regionsArr {
|
|
|
|
+ area := qu.ObjToString(v["area"])
|
|
|
|
+ city := qu.ObjToString(v["city"])
|
|
|
|
+ district := qu.ObjToString(v["district"])
|
|
|
|
+ UpdateRegionsInfo(area, city, district, &reset_regions_infos)
|
|
|
|
+ }
|
|
|
|
+ *all_regions = reset_regions_infos
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//选取规则方法
|
|
|
|
+func ScreenOutReasonableRegionInfo(info map[string]string, regions_infosArr *[]map[string]string, new_regions_infosArr *[]map[string]string) map[string]string {
|
|
|
|
+ area := qu.ObjToString(info["area"])
|
|
|
|
+ city := qu.ObjToString(info["city"])
|
|
|
|
+ district := qu.ObjToString(info["district"])
|
|
|
|
+ is_Exist := false
|
|
|
|
+ for _, s := range *regions_infosArr {
|
|
|
|
+ s_area := qu.ObjToString(s["area"])
|
|
|
|
+ s_city := qu.ObjToString(s["city"])
|
|
|
|
+ s_district := qu.ObjToString(s["district"])
|
|
|
|
+ if s_area == area && s_city == city && s_district == district {
|
|
|
|
+ is_Exist = true
|
|
|
|
+ *new_regions_infosArr = append(*new_regions_infosArr, info)
|
|
|
|
+ } else {
|
|
|
|
+ //判断是否为补充原则
|
|
|
|
+ if area != "" && city != "" && district != "" { //3级补2级 3级补2级
|
|
|
|
+ if s_area == area && s_city == city && s_district == "" {
|
|
|
|
+ is_Exist = true
|
|
|
|
+ *new_regions_infosArr = append(*new_regions_infosArr, info)
|
|
|
|
+ } else if s_area == area && s_city == "" && s_district == "" {
|
|
|
|
+ is_Exist = true
|
|
|
|
+ *new_regions_infosArr = append(*new_regions_infosArr, info)
|
|
|
|
+ }
|
|
|
|
+ } else if area != "" && city != "" && district == "" { //2级补1级
|
|
|
|
+ if s_area == area && s_city == "" {
|
|
|
|
+ is_Exist = true
|
|
|
|
+ *new_regions_infosArr = append(*new_regions_infosArr, info)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ //若没有补充~针对二三级 再次进行去掉末位一级,再次进行一轮比对
|
|
|
|
+ if !is_Exist {
|
|
|
|
+ if area != "" && city != "" && district != "" {
|
|
|
|
+ return map[string]string{"area": area, "city": city, "district": ""}
|
|
|
|
+ }
|
|
|
|
+ if area != "" && city != "" && district == "" {
|
|
|
|
+ return map[string]string{"area": area, "city": "", "district": ""}
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return nil
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//拆分地域数据~目的更好的合并选取
|
|
|
|
+func splitRegionsInfos(infos map[string]map[string]map[string]string) []map[string]string {
|
|
|
|
+ infosArr := []map[string]string{}
|
|
|
|
+ for k, v := range infos {
|
|
|
|
+ if len(v) > 0 {
|
|
|
|
+ for k1, v1 := range v {
|
|
|
|
+ if len(v1) > 0 {
|
|
|
|
+ for k2, _ := range v1 {
|
|
|
|
+ infosArr = append(infosArr, map[string]string{"area": k, "city": k1, "district": k2})
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ infosArr = append(infosArr, map[string]string{"area": k, "city": k1, "district": ""})
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ infosArr = append(infosArr, map[string]string{"area": k, "city": "", "district": ""})
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return infosArr
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//日志流程记录~组级别
|
|
|
|
+func LogProcessRecordingForGroupInfo(key string, valueArr []string, fieldInfos map[string]interface{}, groupInfos map[string]map[string]map[string]string, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) {
|
|
|
|
+ groupArr := splitRegionsInfos(groupInfos)
|
|
|
|
+ finalluArr := splitRegionsInfos(finallyInfos)
|
|
|
|
+ data := map[string]interface{}{
|
|
|
|
+ key + "_value": valueArr,
|
|
|
|
+ key + "_group": groupArr,
|
|
|
|
+ "finally_region": finalluArr,
|
|
|
|
+ }
|
|
|
|
+ for k, v := range fieldInfos {
|
|
|
|
+ data[k] = v
|
|
|
|
+ }
|
|
|
|
+ *logRecordInfo = append(*logRecordInfo, data)
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//日志流程记录~初步
|
|
|
|
+func LogProcessRecordingForTentative(key string, valueArr interface{}, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) {
|
|
|
|
+ finallyArr := splitRegionsInfos(finallyInfos)
|
|
|
|
+ data := map[string]interface{}{
|
|
|
|
+ key + "_value": valueArr,
|
|
|
|
+ "finally_region": finallyArr,
|
|
|
|
+ }
|
|
|
|
+ *logRecordInfo = append(*logRecordInfo, data)
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//同组合并的地域数据
|
|
|
|
+//func MergeGroupRegionInfo(datas_1, datas_2 map[string]map[string]map[string]string) map[string]map[string]map[string]string {
|
|
|
|
+// regions := map[string]map[string]map[string]string{}
|
|
|
|
+// if len(datas_1) > 0 && len(datas_2) == 0 {
|
|
|
|
+// return datas_1
|
|
|
|
+// }
|
|
|
|
+// if len(datas_2) > 0 && len(datas_1) == 0 {
|
|
|
|
+// return datas_2
|
|
|
|
+// }
|
|
|
|
+// for k, v := range datas_1 {
|
|
|
|
+// area, city, district := "", "", ""
|
|
|
|
+// area = k
|
|
|
|
+// if len(v) > 0 {
|
|
|
|
+// for k1, v1 := range v {
|
|
|
|
+// city = k1
|
|
|
|
+// if len(v1) > 0 {
|
|
|
|
+// for k2, _ := range v1 {
|
|
|
|
+// district = k2
|
|
|
|
+// UpdateRegionsInfo(area, city, district, ®ions)
|
|
|
|
+// }
|
|
|
|
+// } else {
|
|
|
|
+// UpdateRegionsInfo(area, city, district, ®ions)
|
|
|
|
+// }
|
|
|
|
+// }
|
|
|
|
+// } else {
|
|
|
|
+// UpdateRegionsInfo(area, city, district, ®ions)
|
|
|
|
+// }
|
|
|
|
+// }
|
|
|
|
+//
|
|
|
|
+// for k, v := range datas_2 {
|
|
|
|
+// area, city, district := "", "", ""
|
|
|
|
+// area = k
|
|
|
|
+// if len(v) > 0 {
|
|
|
|
+// for k1, v1 := range v {
|
|
|
|
+// city = k1
|
|
|
|
+// if len(v1) > 0 {
|
|
|
|
+// for k2, _ := range v1 {
|
|
|
|
+// district = k2
|
|
|
|
+// UpdateRegionsInfo(area, city, district, ®ions)
|
|
|
|
+// }
|
|
|
|
+// } else {
|
|
|
|
+// UpdateRegionsInfo(area, city, district, ®ions)
|
|
|
|
+// }
|
|
|
|
+// }
|
|
|
|
+// } else {
|
|
|
|
+// UpdateRegionsInfo(area, city, district, ®ions)
|
|
|
|
+// }
|
|
|
|
+// }
|
|
|
|
+// return regions
|
|
|
|
+//}
|