title_ner.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. # coding:utf-8
  2. import re
  3. from a2s.a2s_client import a2s_execute
  4. from a2s.tools import json_serialize, json_deserialize
  5. from loguru import logger
  6. from config import daili
  7. from utils.request_fun import top_t
  8. def start(data: dict):
  9. # 本次不使用SSL,所以channel是不安全的
  10. result = {}
  11. try:
  12. retry = 5
  13. for r in range(retry):
  14. bytes_data = json_serialize(data)
  15. result = a2s_execute(daili, 'title_ner', 60, bytes_data)
  16. if result is None:
  17. continue
  18. result = json_deserialize(result)
  19. return result
  20. except Exception as e:
  21. logger.info(str(e))
  22. return result
  23. def title_topic_merge(text):
  24. """
  25. 标题信息抽取,合并多标的物查询
  26. """
  27. tet = re.sub(r'[^\w\s]', '', text)
  28. print(tet)
  29. input_text = {"text": tet}
  30. res = start(input_text)
  31. topic_res = ''
  32. flag = ''
  33. if res:
  34. res_list = res.get('result',[])
  35. for i in res_list:
  36. target = i.get('TARGET', [])
  37. topic_res = ''.join([topic[0] for topic in target])
  38. if topic_res in ['建设']:
  39. topic_res = ''
  40. return topic_res,flag
  41. def title_topic_process(text,):
  42. """
  43. 标题信息抽取
  44. """
  45. input_text = {"text": re.sub(r'[^\w\s]', '', text).replace('定点','')}
  46. pattern = r'项目'
  47. count_re = len(re.findall(pattern, text))
  48. res = start(input_text)
  49. topic_res = ''
  50. flag = ''
  51. if res:
  52. res_list = res.get('result',[])
  53. for i in res_list:
  54. target = i.get('TARGET', [])
  55. if count_re >=2:
  56. topic_res = ''.join([topic[0] for topic in target])
  57. else:
  58. for j in target:
  59. topic_res = j[0]
  60. flag = 'ner'
  61. if topic_res in ['建设']:
  62. topic_res = ''
  63. if not topic_res:
  64. topic_res = text
  65. flag = 'title'
  66. return topic_res,flag
  67. def topic_trace(title,projectname):
  68. """
  69. 主干词抽取
  70. """
  71. if '采购意向' in projectname:
  72. return title
  73. if ('采购意向' in title or '...' in title) and projectname:
  74. title_topic, flag = title_topic_process(projectname)
  75. else:
  76. title_topic, flag = title_topic_process(title)
  77. if title_topic == title and projectname:
  78. title_topic, flag = title_topic_process(projectname)
  79. if not title_topic:
  80. title_topic = top_t(title)
  81. if not title_topic:
  82. title_topic = top_t(projectname)
  83. if not title_topic:
  84. title_topic = title
  85. title_topic = re.sub(r'[^\w\s]', '', title_topic)
  86. return title_topic
  87. if __name__ == '__main__':
  88. data = " 广州公司-(珠海)智慧能源-显示屏-2312(急)变更公告"
  89. r = start({"text": data})
  90. print(topic_trace(data, data))
  91. print(r)