1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- # coding:utf-8
- from servicerd.client import RdClient
- import grpc
- from proto import wordToPdf_pb2_grpc
- from proto import wordToPdf_pb2
- from loguru import logger
- MAX_MESSAGE_LENGTH = 256 * 1024 * 1024
- RD = RdClient(rd_server="192.168.3.12:10021",
- service_name="Pdf2Txt",
- balance_type=3)
- def read_into_buffer(filename):
- try:
- with open(filename, 'rb') as f:
- buf = f.read()
- f.close()
- return True, buf
- except Exception as e:
- logger.warning(e)
- return False, ""
- @RD.fn_wrap
- def call_say(requestFile: list, **kwargs):
- '''
- :param requestFile:
- :param kwargs:
- :return:
- '''
- address = '{}:{}'.format(kwargs['ip'], kwargs['port'])
- WordFileName = requestFile[0]
- Word = requestFile[1]
- print(address, "--->")
- try:
- with grpc.insecure_channel(address, options=[
- ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
- ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),
- ]) as channel:
- stub = wordToPdf_pb2_grpc.AnalysisDocumentsStub(channel)
- response = stub.Extract(wordToPdf_pb2.ParseRequest(WordFileName=WordFileName,
- Word=Word))
- return response
- except Exception as e:
- print(e)
- return None
- def convert_txt_start(pdf_file_path: str, suffix: str = "pdf")->(bool,str):
- '''
- 转换pdf开始
- :param file_path:
- :param suffix:
- :return:
- '''
- state, word = read_into_buffer(pdf_file_path)
- if not state:
- return False, ""
- request_ret = call_say(["0.%s" % suffix, word])
- if not (request_ret and request_ret.State):
- return False, ""
- bytedata = request_ret.Pdf
- try:
- strdata = bytedata.decode("gbk","ignore").encode("utf-8").decode("utf-8")
- except Exception as e:
- print(e)
- strdata = bytedata.decode("utf-8")
- return True, strdata
- if __name__ == '__main__':
- from util.file_operations import save_file
- st, request_ret = convert_txt_start("../data/0.docx", ".docx")
- print(request_ret)
- # state = save_file(request_ret, text_path)
- # print(state)
|