123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- # coding:utf-8
- import re
- from a2s.a2s_client import a2s_execute
- from a2s.tools import json_serialize, json_deserialize
- from loguru import logger
- from config import daili
- from utils.request_fun import top_t
- def start(data: dict):
- # 本次不使用SSL,所以channel是不安全的
- result = {}
- try:
- retry = 5
- for r in range(retry):
- bytes_data = json_serialize(data)
- result = a2s_execute(daili, 'title_ner', 60, bytes_data)
- if result is None:
- continue
- result = json_deserialize(result)
- return result
- except Exception as e:
- logger.info(str(e))
- return result
- def title_topic_merge(text):
- """
- 标题信息抽取,合并多标的物查询
- """
- tet = re.sub(r'[^\w\s]', '', text)
- print(tet)
- input_text = {"text": tet}
- res = start(input_text)
- topic_res = ''
- flag = ''
- if res:
- res_list = res.get('result',[])
- for i in res_list:
- target = i.get('TARGET', [])
- topic_res = ''.join([topic[0] for topic in target])
- if topic_res in ['建设']:
- topic_res = ''
- return topic_res,flag
- def title_topic_process(text,):
- """
- 标题信息抽取
- """
- input_text = {"text": re.sub(r'[^\w\s]', '', text).replace('定点','')}
- pattern = r'项目'
- count_re = len(re.findall(pattern, text))
- res = start(input_text)
- topic_res = ''
- flag = ''
- if res:
- res_list = res.get('result',[])
- for i in res_list:
- target = i.get('TARGET', [])
- if count_re >=2:
- topic_res = ''.join([topic[0] for topic in target])
- else:
- for j in target:
- topic_res = j[0]
- flag = 'ner'
- if topic_res in ['建设']:
- topic_res = ''
- if not topic_res:
- topic_res = text
- flag = 'title'
- return topic_res,flag
- def topic_trace(title,projectname):
- """
- 主干词抽取
- """
- if '采购意向' in projectname:
- return title
- if ('采购意向' in title or '...' in title) and projectname:
- title_topic, flag = title_topic_process(projectname)
- else:
- title_topic, flag = title_topic_process(title)
- if title_topic == title and projectname:
- title_topic, flag = title_topic_process(projectname)
- if not title_topic:
- title_topic = top_t(title)
- if not title_topic:
- title_topic = top_t(projectname)
- if not title_topic:
- title_topic = title
- title_topic = re.sub(r'[^\w\s]', '', title_topic)
- return title_topic
- if __name__ == '__main__':
- data = " 广州公司-(珠海)智慧能源-显示屏-2312(急)变更公告"
- r = start({"text": data})
- print(topic_trace(data, data))
- print(r)
|