123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404 |
- package extract
- import (
- qu "qfw/util"
- )
- //最终确认确认指定地域
- func ConfirmUniqueRegionInfo(regions map[string]map[string]map[string]string, area *string, city *string, district *string) bool {
- if len(regions) > 1 || len(regions) == 0 {
- return false
- }
- for k, v := range regions {
- *area = k
- if len(v) == 1 {
- for k1, v1 := range v {
- *city = k1
- if len(v1) == 1 {
- for k2, _ := range v1 {
- *district = k2
- }
- }
- }
- }
- }
- if *area != "" && *city != "" && *district != "" {
- return true
- }
- return false
- }
- //完整信息
- func CompleteRegionInfo(area *string, city *string, district *string) {
- if *area == "北京" {
- *city = "北京市"
- if *district == "北京朝阳" { //特殊情况(北京朝阳中西医结合急诊抢救中心:5a84079740d2d9bbe88bad90)
- *district = "朝阳区"
- }
- } else if *area == "天津" {
- *city = "天津市"
- } else if *area == "上海" {
- *city = "上海市"
- } else if *area == "重庆" {
- *city = "重庆市"
- }
- if *area == "" {
- *area = "全国"
- *city = ""
- *district = ""
- }
- }
- //根据词获取所有的地域
- func (e *ExtractTask) takeRegionsFromWords(text string) []map[string]string {
- regions := []map[string]string{}
- //全称匹配
- for pos_full, trie_full := range e.Trie_Fulls {
- if trie_full.Get(text) {
- if pos_full == 0 {
- if province := e.ProvinceMap[text]; province != "" {
- regions = append(regions, map[string]string{"area": province, "city": "", "district": ""})
- }
- } else if pos_full == 1 {
- if data := e.CityFullMap[text]; data != nil {
- if data.P.Brief != "" && data.Name != "" {
- regions = append(regions, map[string]string{"area": data.P.Brief, "city": data.Name, "district": ""})
- }
- }
- } else if pos_full == 2 {
- citys := e.DistrictCityMap[text]
- for _, c := range citys {
- if c.P.Brief != "" && c.Name != "" && text != "" {
- regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": text})
- }
- }
- }
- }
- }
- //简称匹配
- for pos_sim, trie_sim := range e.Trie_Sims {
- if trie_sim.Get(text) {
- if pos_sim == 0 {
- if text != "" {
- regions = append(regions, map[string]string{"area": text, "city": "", "district": ""})
- }
- } else if pos_sim == 1 {
- if csMap := e.CityBriefMap[text]; csMap != nil {
- if csMap.P.Brief != "" && csMap.Name != "" {
- regions = append(regions, map[string]string{"area": csMap.P.Brief, "city": csMap.Name, "district": ""})
- }
- }
- } else if pos_sim == 2 {
- citysArr := e.DistrictSimAndAll[text]
- for _, full_citys := range citysArr {
- for district, c := range full_citys {
- if c == nil || c.P == nil || c.Name == "" {
- continue
- }
- if c.P.Brief != "" && c.Name != "" && district != "" {
- regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district})
- }
- }
- }
- }
- }
- }
- return regions
- }
- //文本取地域 from 1~jsondata文本 2~其他文本
- func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[string]map[string]string, from int) []map[string]interface{} {
- regionValues := []map[string]interface{}{}
- if text == "" {
- return regionValues
- }
- wordsArr := []string{}
- if from == 1 {
- wordsArr = e.Seg_PCD.Cut(text, true)
- } else if from == 2 {
- wordsArr = e.Seg_SV.Cut(text, true)
- }
- for _, word := range wordsArr {
- regionArr := e.takeRegionsFromWords(word)
- for _, v := range regionArr {
- area := qu.ObjToString(v["area"])
- city := qu.ObjToString(v["city"])
- district := qu.ObjToString(v["district"])
- UpdateRegionsInfo(area, city, district, regions)
- regionValues = append(regionValues, map[string]interface{}{"area": area, "city": city, "district": district})
- }
- }
- return regionValues
- }
- //更新方法
- func UpdateRegionsInfo(area, city, district string, regions *map[string]map[string]map[string]string) {
- if (*regions)[area] == nil {
- city_info := map[string]map[string]string{}
- district_info := map[string]string{}
- if city != "" {
- if district != "" {
- district_info[district] = district
- }
- city_info[city] = district_info
- }
- (*regions)[area] = city_info //新增
- } else {
- city_info := (*regions)[area]
- if city != "" {
- district_info := map[string]string{}
- if city_info[city] != nil {
- district_info = city_info[city]
- }
- if district != "" {
- district_info[district] = district
- }
- city_info[city] = district_info
- (*regions)[area] = city_info
- }
- }
- }
- //同组合并后合理性校验
- func ReasonableGroupRegionInfo(datas map[string]map[string]map[string]string) map[string]map[string]map[string]string {
- if len(datas) > 2 || len(datas) == 0 { //省份超限,无效
- return map[string]map[string]map[string]string{}
- }
- uncity, undistrict := 0, 0
- for _, v := range datas {
- uncity += len(v)
- for _, v1 := range v {
- undistrict += len(v1)
- }
- }
- if uncity > 3 {
- regions_1 := map[string]map[string]map[string]string{}
- for k, v := range datas {
- city_info := map[string]map[string]string{}
- if len(v) == 1 {
- city_info = v
- }
- regions_1[k] = city_info
- }
- //计算当前
- uncity_district := 0
- for _, v := range regions_1 {
- for _, v1 := range v {
- uncity_district += len(v1)
- }
- }
- if uncity_district > 3 {
- regions_2 := map[string]map[string]map[string]string{}
- for k, v := range regions_1 {
- city_info := map[string]map[string]string{}
- for k1, v1 := range v {
- district_info := map[string]string{}
- if len(v1) == 1 {
- district_info = v1
- }
- city_info[k1] = district_info
- }
- regions_2[k] = city_info
- }
- return regions_2
- }
- return regions_1
- }
- if undistrict > 3 {
- new_regions := map[string]map[string]map[string]string{}
- for k, v := range datas {
- city_info := map[string]map[string]string{}
- for k1, v1 := range v {
- district_info := map[string]string{}
- if len(v1) == 1 {
- district_info = v1
- }
- city_info[k1] = district_info
- }
- new_regions[k] = city_info
- }
- return new_regions
- }
- return datas
- }
- //两组比对~找寻补充,排除数据
- func AnalysisIsUniqueInfo(regions map[string]map[string]map[string]string, all_regions *map[string]map[string]map[string]string) {
- if len(regions) == 0 {
- return
- }
- if len(*all_regions) == 0 {
- *all_regions = regions
- return
- }
- regionsArr := splitRegionsInfos(regions) //目标数据
- all_regionsArr := splitRegionsInfos(*all_regions) //源数据
- new_all_regionsArr := []map[string]string{} //新数据
- for _, info := range regionsArr {
- area := qu.ObjToString(info["area"])
- if (*all_regions)[area] == nil {
- continue
- }
- unmatchInfo1 := ScreenOutReasonableRegionInfo(info, &all_regionsArr, &new_all_regionsArr)
- if unmatchInfo1 != nil { //降级匹配~最多二级
- unmatchInfo2 := ScreenOutReasonableRegionInfo(unmatchInfo1, &all_regionsArr, &new_all_regionsArr)
- if unmatchInfo2 != nil { //降级匹配~最多一级
- ScreenOutReasonableRegionInfo(unmatchInfo2, &all_regionsArr, &new_all_regionsArr)
- }
- }
- }
- //根据最新有效地域组~重新构建所有信息
- reset_regions_infos := map[string]map[string]map[string]string{}
- if len(new_all_regionsArr) > 0 {
- for _, v := range new_all_regionsArr {
- area := qu.ObjToString(v["area"])
- city := qu.ObjToString(v["city"])
- district := qu.ObjToString(v["district"])
- UpdateRegionsInfo(area, city, district, &reset_regions_infos)
- }
- *all_regions = reset_regions_infos
- }
- }
- //选取规则方法
- func ScreenOutReasonableRegionInfo(info map[string]string, regions_infosArr *[]map[string]string, new_regions_infosArr *[]map[string]string) map[string]string {
- area := qu.ObjToString(info["area"])
- city := qu.ObjToString(info["city"])
- district := qu.ObjToString(info["district"])
- is_Exist := false
- for _, s := range *regions_infosArr {
- s_area := qu.ObjToString(s["area"])
- s_city := qu.ObjToString(s["city"])
- s_district := qu.ObjToString(s["district"])
- if s_area == area && s_city == city && s_district == district {
- is_Exist = true
- *new_regions_infosArr = append(*new_regions_infosArr, info)
- } else {
- //判断是否为补充原则
- if area != "" && city != "" && district != "" { //3级补2级 3级补2级
- if s_area == area && s_city == city && s_district == "" {
- is_Exist = true
- *new_regions_infosArr = append(*new_regions_infosArr, info)
- } else if s_area == area && s_city == "" && s_district == "" {
- is_Exist = true
- *new_regions_infosArr = append(*new_regions_infosArr, info)
- }
- } else if area != "" && city != "" && district == "" { //2级补1级
- if s_area == area && s_city == "" {
- is_Exist = true
- *new_regions_infosArr = append(*new_regions_infosArr, info)
- }
- }
- }
- }
- //若没有补充~针对二三级 再次进行去掉末位一级,再次进行一轮比对
- if !is_Exist {
- if area != "" && city != "" && district != "" {
- return map[string]string{"area": area, "city": city, "district": ""}
- }
- if area != "" && city != "" && district == "" {
- return map[string]string{"area": area, "city": "", "district": ""}
- }
- }
- return nil
- }
- //拆分地域数据~目的更好的合并选取
- func splitRegionsInfos(infos map[string]map[string]map[string]string) []map[string]string {
- infosArr := []map[string]string{}
- for k, v := range infos {
- if len(v) > 0 {
- for k1, v1 := range v {
- if len(v1) > 0 {
- for k2, _ := range v1 {
- infosArr = append(infosArr, map[string]string{"area": k, "city": k1, "district": k2})
- }
- } else {
- infosArr = append(infosArr, map[string]string{"area": k, "city": k1, "district": ""})
- }
- }
- } else {
- infosArr = append(infosArr, map[string]string{"area": k, "city": "", "district": ""})
- }
- }
- return infosArr
- }
- //日志流程记录~组级别
- func LogProcessRecordingForGroupInfo(key string, valueArr []string, fieldInfos map[string]interface{}, groupInfos map[string]map[string]map[string]string, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) {
- groupArr := splitRegionsInfos(groupInfos)
- finalluArr := splitRegionsInfos(finallyInfos)
- data := map[string]interface{}{
- key + "_value": valueArr,
- key + "_group": groupArr,
- "finally_region": finalluArr,
- }
- for k, v := range fieldInfos {
- data[k] = v
- }
- *logRecordInfo = append(*logRecordInfo, data)
- }
- //日志流程记录~初步
- func LogProcessRecordingForTentative(key string, valueArr interface{}, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) {
- finallyArr := splitRegionsInfos(finallyInfos)
- data := map[string]interface{}{
- key + "_value": valueArr,
- "finally_region": finallyArr,
- }
- *logRecordInfo = append(*logRecordInfo, data)
- }
- //同组合并的地域数据
- //func MergeGroupRegionInfo(datas_1, datas_2 map[string]map[string]map[string]string) map[string]map[string]map[string]string {
- // regions := map[string]map[string]map[string]string{}
- // if len(datas_1) > 0 && len(datas_2) == 0 {
- // return datas_1
- // }
- // if len(datas_2) > 0 && len(datas_1) == 0 {
- // return datas_2
- // }
- // for k, v := range datas_1 {
- // area, city, district := "", "", ""
- // area = k
- // if len(v) > 0 {
- // for k1, v1 := range v {
- // city = k1
- // if len(v1) > 0 {
- // for k2, _ := range v1 {
- // district = k2
- // UpdateRegionsInfo(area, city, district, ®ions)
- // }
- // } else {
- // UpdateRegionsInfo(area, city, district, ®ions)
- // }
- // }
- // } else {
- // UpdateRegionsInfo(area, city, district, ®ions)
- // }
- // }
- //
- // for k, v := range datas_2 {
- // area, city, district := "", "", ""
- // area = k
- // if len(v) > 0 {
- // for k1, v1 := range v {
- // city = k1
- // if len(v1) > 0 {
- // for k2, _ := range v1 {
- // district = k2
- // UpdateRegionsInfo(area, city, district, ®ions)
- // }
- // } else {
- // UpdateRegionsInfo(area, city, district, ®ions)
- // }
- // }
- // } else {
- // UpdateRegionsInfo(area, city, district, ®ions)
- // }
- // }
- // return regions
- //}
|