# coding:utf-8 from servicerd.client import RdClient import grpc from proto import wordToPdf_pb2_grpc from proto import wordToPdf_pb2 from loguru import logger MAX_MESSAGE_LENGTH = 256 * 1024 * 1024 RD = RdClient(rd_server="192.168.3.12:10021", service_name="Pdf2Txt", balance_type=3) def read_into_buffer(filename): try: with open(filename, 'rb') as f: buf = f.read() f.close() return True, buf except Exception as e: logger.warning(e) return False, "" @RD.fn_wrap def call_say(requestFile: list, **kwargs): ''' :param requestFile: :param kwargs: :return: ''' address = '{}:{}'.format(kwargs['ip'], kwargs['port']) WordFileName = requestFile[0] Word = requestFile[1] print(address, "--->") try: with grpc.insecure_channel(address, options=[ ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH), ]) as channel: stub = wordToPdf_pb2_grpc.AnalysisDocumentsStub(channel) response = stub.Extract(wordToPdf_pb2.ParseRequest(WordFileName=WordFileName, Word=Word)) return response except Exception as e: print(e) return None def convert_txt_start(pdf_file_path: str, suffix: str = "pdf")->(bool,str): ''' 转换pdf开始 :param file_path: :param suffix: :return: ''' state, word = read_into_buffer(pdf_file_path) if not state: return False, "" request_ret = call_say(["0.%s" % suffix, word]) if not (request_ret and request_ret.State): return False, "" bytedata = request_ret.Pdf try: strdata = bytedata.decode("gbk","ignore").encode("utf-8").decode("utf-8") except Exception as e: print(e) strdata = bytedata.decode("utf-8") return True, strdata if __name__ == '__main__': from util.file_operations import save_file st, request_ret = convert_txt_start("../data/0.docx", ".docx") print(request_ret) # state = save_file(request_ret, text_path) # print(state)