convertTxtServer.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. # coding:utf-8
  2. from servicerd.client import RdClient
  3. import grpc
  4. from proto import wordToPdf_pb2_grpc
  5. from proto import wordToPdf_pb2
  6. from loguru import logger
  7. MAX_MESSAGE_LENGTH = 256 * 1024 * 1024
  8. RD = RdClient(rd_server="192.168.3.12:10021",
  9. service_name="Pdf2Txt",
  10. balance_type=3)
  11. def read_into_buffer(filename):
  12. try:
  13. with open(filename, 'rb') as f:
  14. buf = f.read()
  15. f.close()
  16. return True, buf
  17. except Exception as e:
  18. logger.warning(e)
  19. return False, ""
  20. @RD.fn_wrap
  21. def call_say(requestFile: list, **kwargs):
  22. '''
  23. :param requestFile:
  24. :param kwargs:
  25. :return:
  26. '''
  27. address = '{}:{}'.format(kwargs['ip'], kwargs['port'])
  28. WordFileName = requestFile[0]
  29. Word = requestFile[1]
  30. print(address, "--->")
  31. try:
  32. with grpc.insecure_channel(address, options=[
  33. ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
  34. ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),
  35. ]) as channel:
  36. stub = wordToPdf_pb2_grpc.AnalysisDocumentsStub(channel)
  37. response = stub.Extract(wordToPdf_pb2.ParseRequest(WordFileName=WordFileName,
  38. Word=Word))
  39. return response
  40. except Exception as e:
  41. print(e)
  42. return None
  43. def convert_txt_start(pdf_file_path: str, suffix: str = "pdf")->(bool,str):
  44. '''
  45. 转换pdf开始
  46. :param file_path:
  47. :param suffix:
  48. :return:
  49. '''
  50. state, word = read_into_buffer(pdf_file_path)
  51. if not state:
  52. return False, ""
  53. request_ret = call_say(["0.%s" % suffix, word])
  54. if not (request_ret and request_ret.State):
  55. return False, ""
  56. bytedata = request_ret.Pdf
  57. try:
  58. strdata = bytedata.decode("gbk","ignore").encode("utf-8").decode("utf-8")
  59. except Exception as e:
  60. print(e)
  61. strdata = bytedata.decode("utf-8")
  62. return True, strdata
  63. if __name__ == '__main__':
  64. from util.file_operations import save_file
  65. st, request_ret = convert_txt_start("../data/0.docx", ".docx")
  66. print(request_ret)
  67. # state = save_file(request_ret, text_path)
  68. # print(state)