extractcity_way.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. package extract
  2. import (
  3. qu "qfw/util"
  4. )
  5. //最终确认确认指定地域
  6. func ConfirmUniqueRegionInfo(regions map[string]map[string]map[string]string, area *string, city *string, district *string) bool {
  7. if len(regions) > 1 || len(regions) == 0 {
  8. return false
  9. }
  10. for k, v := range regions {
  11. *area = k
  12. if len(v) == 1 {
  13. for k1, v1 := range v {
  14. *city = k1
  15. if len(v1) == 1 {
  16. for k2, _ := range v1 {
  17. *district = k2
  18. }
  19. }
  20. }
  21. }
  22. }
  23. if *area != "" && *city != "" && *district != "" {
  24. return true
  25. }
  26. return false
  27. }
  28. //完整信息
  29. func CompleteRegionInfo(area *string, city *string, district *string) {
  30. if *area == "北京" {
  31. *city = "北京市"
  32. if *district == "北京朝阳" { //特殊情况(北京朝阳中西医结合急诊抢救中心:5a84079740d2d9bbe88bad90)
  33. *district = "朝阳区"
  34. }
  35. } else if *area == "天津" {
  36. *city = "天津市"
  37. } else if *area == "上海" {
  38. *city = "上海市"
  39. } else if *area == "重庆" {
  40. *city = "重庆市"
  41. }
  42. if *area == "" {
  43. *area = "全国"
  44. *city = ""
  45. *district = ""
  46. }
  47. }
  48. //根据词获取所有的地域
  49. func (e *ExtractTask) takeRegionsFromWords(text string) []map[string]string {
  50. regions := []map[string]string{}
  51. //全称匹配
  52. for pos_full, trie_full := range e.Trie_Fulls {
  53. if trie_full.Get(text) {
  54. if pos_full == 0 {
  55. if province := e.ProvinceMap[text]; province != "" {
  56. regions = append(regions, map[string]string{"area": province, "city": "", "district": ""})
  57. }
  58. } else if pos_full == 1 {
  59. if data := e.CityFullMap[text]; data != nil {
  60. if data.P.Brief != "" && data.Name != "" {
  61. regions = append(regions, map[string]string{"area": data.P.Brief, "city": data.Name, "district": ""})
  62. }
  63. }
  64. } else if pos_full == 2 {
  65. citys := e.DistrictCityMap[text]
  66. for _, c := range citys {
  67. if c.P.Brief != "" && c.Name != "" && text != "" {
  68. regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": text})
  69. }
  70. }
  71. }
  72. }
  73. }
  74. //简称匹配
  75. for pos_sim, trie_sim := range e.Trie_Sims {
  76. if trie_sim.Get(text) {
  77. if pos_sim == 0 {
  78. if text != "" {
  79. regions = append(regions, map[string]string{"area": text, "city": "", "district": ""})
  80. }
  81. } else if pos_sim == 1 {
  82. if csMap := e.CityBriefMap[text]; csMap != nil {
  83. if csMap.P.Brief != "" && csMap.Name != "" {
  84. regions = append(regions, map[string]string{"area": csMap.P.Brief, "city": csMap.Name, "district": ""})
  85. }
  86. }
  87. } else if pos_sim == 2 {
  88. citysArr := e.DistrictSimAndAll[text]
  89. for _, full_citys := range citysArr {
  90. for district, c := range full_citys {
  91. if c == nil || c.P == nil || c.Name == "" {
  92. continue
  93. }
  94. if c.P.Brief != "" && c.Name != "" && district != "" {
  95. regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district})
  96. }
  97. }
  98. }
  99. }
  100. }
  101. }
  102. return regions
  103. }
  104. //文本取地域 from 1~jsondata文本 2~其他文本
  105. func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[string]map[string]string, from int) []map[string]interface{} {
  106. regionValues := []map[string]interface{}{}
  107. if text == "" {
  108. return regionValues
  109. }
  110. wordsArr := []string{}
  111. if from == 1 {
  112. wordsArr = e.Seg_PCD.Cut(text, true)
  113. } else if from == 2 {
  114. wordsArr = e.Seg_SV.Cut(text, true)
  115. }
  116. for _, word := range wordsArr {
  117. regionArr := e.takeRegionsFromWords(word)
  118. for _, v := range regionArr {
  119. area := qu.ObjToString(v["area"])
  120. city := qu.ObjToString(v["city"])
  121. district := qu.ObjToString(v["district"])
  122. UpdateRegionsInfo(area, city, district, regions)
  123. regionValues = append(regionValues, map[string]interface{}{"area": area, "city": city, "district": district})
  124. }
  125. }
  126. return regionValues
  127. }
  128. //更新方法
  129. func UpdateRegionsInfo(area, city, district string, regions *map[string]map[string]map[string]string) {
  130. if (*regions)[area] == nil {
  131. city_info := map[string]map[string]string{}
  132. district_info := map[string]string{}
  133. if city != "" {
  134. if district != "" {
  135. district_info[district] = district
  136. }
  137. city_info[city] = district_info
  138. }
  139. (*regions)[area] = city_info //新增
  140. } else {
  141. city_info := (*regions)[area]
  142. if city != "" {
  143. district_info := map[string]string{}
  144. if city_info[city] != nil {
  145. district_info = city_info[city]
  146. }
  147. if district != "" {
  148. district_info[district] = district
  149. }
  150. city_info[city] = district_info
  151. (*regions)[area] = city_info
  152. }
  153. }
  154. }
  155. //同组合并后合理性校验
  156. func ReasonableGroupRegionInfo(datas map[string]map[string]map[string]string) map[string]map[string]map[string]string {
  157. if len(datas) > 2 || len(datas) == 0 { //省份超限,无效
  158. return map[string]map[string]map[string]string{}
  159. }
  160. uncity, undistrict := 0, 0
  161. for _, v := range datas {
  162. uncity += len(v)
  163. for _, v1 := range v {
  164. undistrict += len(v1)
  165. }
  166. }
  167. if uncity > 3 {
  168. regions_1 := map[string]map[string]map[string]string{}
  169. for k, v := range datas {
  170. city_info := map[string]map[string]string{}
  171. if len(v) == 1 {
  172. city_info = v
  173. }
  174. regions_1[k] = city_info
  175. }
  176. //计算当前
  177. uncity_district := 0
  178. for _, v := range regions_1 {
  179. for _, v1 := range v {
  180. uncity_district += len(v1)
  181. }
  182. }
  183. if uncity_district > 3 {
  184. regions_2 := map[string]map[string]map[string]string{}
  185. for k, v := range regions_1 {
  186. city_info := map[string]map[string]string{}
  187. for k1, v1 := range v {
  188. district_info := map[string]string{}
  189. if len(v1) == 1 {
  190. district_info = v1
  191. }
  192. city_info[k1] = district_info
  193. }
  194. regions_2[k] = city_info
  195. }
  196. return regions_2
  197. }
  198. return regions_1
  199. }
  200. if undistrict > 3 {
  201. new_regions := map[string]map[string]map[string]string{}
  202. for k, v := range datas {
  203. city_info := map[string]map[string]string{}
  204. for k1, v1 := range v {
  205. district_info := map[string]string{}
  206. if len(v1) == 1 {
  207. district_info = v1
  208. }
  209. city_info[k1] = district_info
  210. }
  211. new_regions[k] = city_info
  212. }
  213. return new_regions
  214. }
  215. return datas
  216. }
  217. //两组比对~找寻补充,排除数据
  218. func AnalysisIsUniqueInfo(regions map[string]map[string]map[string]string, all_regions *map[string]map[string]map[string]string) {
  219. if len(regions) == 0 {
  220. return
  221. }
  222. if len(*all_regions) == 0 {
  223. *all_regions = regions
  224. return
  225. }
  226. regionsArr := splitRegionsInfos(regions) //目标数据
  227. all_regionsArr := splitRegionsInfos(*all_regions) //源数据
  228. new_all_regionsArr := []map[string]string{} //新数据
  229. for _, info := range regionsArr {
  230. area := qu.ObjToString(info["area"])
  231. if (*all_regions)[area] == nil {
  232. continue
  233. }
  234. unmatchInfo1 := ScreenOutReasonableRegionInfo(info, &all_regionsArr, &new_all_regionsArr)
  235. if unmatchInfo1 != nil { //降级匹配~最多二级
  236. unmatchInfo2 := ScreenOutReasonableRegionInfo(unmatchInfo1, &all_regionsArr, &new_all_regionsArr)
  237. if unmatchInfo2 != nil { //降级匹配~最多一级
  238. ScreenOutReasonableRegionInfo(unmatchInfo2, &all_regionsArr, &new_all_regionsArr)
  239. }
  240. }
  241. }
  242. //根据最新有效地域组~重新构建所有信息
  243. reset_regions_infos := map[string]map[string]map[string]string{}
  244. if len(new_all_regionsArr) > 0 {
  245. for _, v := range new_all_regionsArr {
  246. area := qu.ObjToString(v["area"])
  247. city := qu.ObjToString(v["city"])
  248. district := qu.ObjToString(v["district"])
  249. UpdateRegionsInfo(area, city, district, &reset_regions_infos)
  250. }
  251. *all_regions = reset_regions_infos
  252. }
  253. }
  254. //选取规则方法
  255. func ScreenOutReasonableRegionInfo(info map[string]string, regions_infosArr *[]map[string]string, new_regions_infosArr *[]map[string]string) map[string]string {
  256. area := qu.ObjToString(info["area"])
  257. city := qu.ObjToString(info["city"])
  258. district := qu.ObjToString(info["district"])
  259. is_Exist := false
  260. for _, s := range *regions_infosArr {
  261. s_area := qu.ObjToString(s["area"])
  262. s_city := qu.ObjToString(s["city"])
  263. s_district := qu.ObjToString(s["district"])
  264. if s_area == area && s_city == city && s_district == district {
  265. is_Exist = true
  266. *new_regions_infosArr = append(*new_regions_infosArr, info)
  267. } else {
  268. //判断是否为补充原则
  269. if area != "" && city != "" && district != "" { //3级补2级 3级补2级
  270. if s_area == area && s_city == city && s_district == "" {
  271. is_Exist = true
  272. *new_regions_infosArr = append(*new_regions_infosArr, info)
  273. } else if s_area == area && s_city == "" && s_district == "" {
  274. is_Exist = true
  275. *new_regions_infosArr = append(*new_regions_infosArr, info)
  276. }
  277. } else if area != "" && city != "" && district == "" { //2级补1级
  278. if s_area == area && s_city == "" {
  279. is_Exist = true
  280. *new_regions_infosArr = append(*new_regions_infosArr, info)
  281. }
  282. }
  283. }
  284. }
  285. //若没有补充~针对二三级 再次进行去掉末位一级,再次进行一轮比对
  286. if !is_Exist {
  287. if area != "" && city != "" && district != "" {
  288. return map[string]string{"area": area, "city": city, "district": ""}
  289. }
  290. if area != "" && city != "" && district == "" {
  291. return map[string]string{"area": area, "city": "", "district": ""}
  292. }
  293. }
  294. return nil
  295. }
  296. //拆分地域数据~目的更好的合并选取
  297. func splitRegionsInfos(infos map[string]map[string]map[string]string) []map[string]string {
  298. infosArr := []map[string]string{}
  299. for k, v := range infos {
  300. if len(v) > 0 {
  301. for k1, v1 := range v {
  302. if len(v1) > 0 {
  303. for k2, _ := range v1 {
  304. infosArr = append(infosArr, map[string]string{"area": k, "city": k1, "district": k2})
  305. }
  306. } else {
  307. infosArr = append(infosArr, map[string]string{"area": k, "city": k1, "district": ""})
  308. }
  309. }
  310. } else {
  311. infosArr = append(infosArr, map[string]string{"area": k, "city": "", "district": ""})
  312. }
  313. }
  314. return infosArr
  315. }
  316. //日志流程记录~组级别
  317. func LogProcessRecordingForGroupInfo(key string, valueArr []string, fieldInfos map[string]interface{}, groupInfos map[string]map[string]map[string]string, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) {
  318. groupArr := splitRegionsInfos(groupInfos)
  319. finalluArr := splitRegionsInfos(finallyInfos)
  320. data := map[string]interface{}{
  321. key + "_value": valueArr,
  322. key + "_group": groupArr,
  323. "finally_region": finalluArr,
  324. }
  325. for k, v := range fieldInfos {
  326. data[k] = v
  327. }
  328. *logRecordInfo = append(*logRecordInfo, data)
  329. }
  330. //日志流程记录~初步
  331. func LogProcessRecordingForTentative(key string, valueArr interface{}, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) {
  332. finallyArr := splitRegionsInfos(finallyInfos)
  333. data := map[string]interface{}{
  334. key + "_value": valueArr,
  335. "finally_region": finallyArr,
  336. }
  337. *logRecordInfo = append(*logRecordInfo, data)
  338. }
  339. //同组合并的地域数据
  340. //func MergeGroupRegionInfo(datas_1, datas_2 map[string]map[string]map[string]string) map[string]map[string]map[string]string {
  341. // regions := map[string]map[string]map[string]string{}
  342. // if len(datas_1) > 0 && len(datas_2) == 0 {
  343. // return datas_1
  344. // }
  345. // if len(datas_2) > 0 && len(datas_1) == 0 {
  346. // return datas_2
  347. // }
  348. // for k, v := range datas_1 {
  349. // area, city, district := "", "", ""
  350. // area = k
  351. // if len(v) > 0 {
  352. // for k1, v1 := range v {
  353. // city = k1
  354. // if len(v1) > 0 {
  355. // for k2, _ := range v1 {
  356. // district = k2
  357. // UpdateRegionsInfo(area, city, district, &regions)
  358. // }
  359. // } else {
  360. // UpdateRegionsInfo(area, city, district, &regions)
  361. // }
  362. // }
  363. // } else {
  364. // UpdateRegionsInfo(area, city, district, &regions)
  365. // }
  366. // }
  367. //
  368. // for k, v := range datas_2 {
  369. // area, city, district := "", "", ""
  370. // area = k
  371. // if len(v) > 0 {
  372. // for k1, v1 := range v {
  373. // city = k1
  374. // if len(v1) > 0 {
  375. // for k2, _ := range v1 {
  376. // district = k2
  377. // UpdateRegionsInfo(area, city, district, &regions)
  378. // }
  379. // } else {
  380. // UpdateRegionsInfo(area, city, district, &regions)
  381. // }
  382. // }
  383. // } else {
  384. // UpdateRegionsInfo(area, city, district, &regions)
  385. // }
  386. // }
  387. // return regions
  388. //}