get_region.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. import os
  2. import cpca
  3. import re
  4. import pandas as pd
  5. def get_city_info(text):
  6. # 获取当前脚本所在目录的上一级目录
  7. current_dir = os.path.dirname(__file__)
  8. parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
  9. # 读取区县数据
  10. df_county_addr = os.path.join(parent_dir, 'docs', '区县.xlsx')
  11. df_county=pd.read_excel(df_county_addr)
  12. # 读取乡镇数据
  13. df_town_addr = os.path.join(parent_dir, 'docs', '乡镇.xlsx')
  14. df_town = pd.read_excel(df_town_addr)
  15. # 读取市级数据
  16. df_city_addr = os.path.join(parent_dir, 'docs', '市.xlsx')
  17. df_city = pd.read_excel(df_city_addr)
  18. # 使用cpca库提取地名
  19. df = cpca.transform([text])
  20. province = None
  21. city = None
  22. district = None
  23. if not df.empty:
  24. province = df.iloc[0]['省']
  25. city = df.iloc[0]['市']
  26. district = df.iloc[0]['区']
  27. if province is None and city is None and district is None:
  28. # 使用正则表达式提取乡镇信息
  29. towns = re.findall(r'[\u4e00-\u9fa5]+镇|[\u4e00-\u9fa5]+乡|[\u4e00-\u9fa5]+街道|[\u4e00-\u9fa5]+庄|[\u4e00-\u9fa5]+营|[\u4e00-\u9fa5]+店', text)
  30. if towns:
  31. for town in towns:
  32. town_name = None
  33. if town in df_town['乡镇名称'].values or town in df_town['乡镇简称'].values:
  34. town_name = town
  35. # 根据乡镇名查找对应的区县代码
  36. town_info = df_town[df_town['乡镇名称'] == town_name].iloc[0]
  37. county_code = town_info['区县代码']
  38. # 根据区县代码查找对应的区县名称
  39. county_info = df_county[df_county['区县代码'] == county_code].iloc[0]
  40. county_name = county_info['区县名称']
  41. # 将区县名称转换成对应的省份名称
  42. province = county_name
  43. # 尝试在市级数据中查找对应的市信息
  44. city_code = county_info['城市代码']
  45. city_info = df_city[df_city['城市代码'] == city_code]
  46. if not city_info.empty:
  47. city = city_info.iloc[0]['城市名称']
  48. # 将城市名称转换成对应的省份名称
  49. df_city_result = cpca.transform([city])
  50. province = df_city_result.iloc[0]['省']
  51. break# 找到乡镇信息后跳出循环
  52. else:
  53. continue
  54. return province, city, district
  55. if __name__ == '__main__':
  56. # 使用方法示例
  57. province, city, district = get_city_info("杞县文化广电新闻出版旅游局")
  58. if province==None or city==None or district==None:
  59. print("44444")
  60. print(province, city, district)