# coding:utf-8 import re from a2s.a2s_client import a2s_execute from a2s.tools import json_serialize, json_deserialize from loguru import logger from config import daili from utils.request_fun import top_t def start(data: dict): # 本次不使用SSL,所以channel是不安全的 result = {} try: retry = 5 for r in range(retry): bytes_data = json_serialize(data) result = a2s_execute(daili, 'title_ner', 60, bytes_data) if result is None: continue result = json_deserialize(result) return result except Exception as e: logger.info(str(e)) return result def title_topic_merge(text): """ 标题信息抽取,合并多标的物查询 """ tet = re.sub(r'[^\w\s]', '', text) print(tet) input_text = {"text": tet} res = start(input_text) topic_res = '' flag = '' if res: res_list = res.get('result',[]) for i in res_list: target = i.get('TARGET', []) topic_res = ''.join([topic[0] for topic in target]) if topic_res in ['建设']: topic_res = '' return topic_res,flag def title_topic_process(text,): """ 标题信息抽取 """ input_text = {"text": re.sub(r'[^\w\s]', '', text).replace('定点','')} pattern = r'项目' count_re = len(re.findall(pattern, text)) res = start(input_text) topic_res = '' flag = '' if res: res_list = res.get('result',[]) for i in res_list: target = i.get('TARGET', []) if count_re >=2: topic_res = ''.join([topic[0] for topic in target]) else: for j in target: topic_res = j[0] flag = 'ner' if topic_res in ['建设']: topic_res = '' if not topic_res: topic_res = text flag = 'title' return topic_res,flag def topic_trace(title,projectname): """ 主干词抽取 """ if '采购意向' in projectname: return title if ('采购意向' in title or '...' in title) and projectname: title_topic, flag = title_topic_process(projectname) else: title_topic, flag = title_topic_process(title) if title_topic == title and projectname: title_topic, flag = title_topic_process(projectname) if not title_topic: title_topic = top_t(title) if not title_topic: title_topic = top_t(projectname) if not title_topic: title_topic = title title_topic = re.sub(r'[^\w\s]', '', title_topic) return title_topic if __name__ == '__main__': data = " 广州公司-(珠海)智慧能源-显示屏-2312(急)变更公告" r = start({"text": data}) print(topic_trace(data, data)) print(r)