app.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. # coding:utf-8
  2. from util.file_operations import save_file
  3. from util.file_operations import generate_directory
  4. from util.file_operations import del_directory
  5. from util.attach_tools import download
  6. import file_processing
  7. import os
  8. import uuid
  9. from util.check_timeout import limit_decor
  10. from loguru import logger
  11. from proto import fileText_pb2
  12. from util.fs_client import FileServeClient
  13. from docs.config import error_number
  14. from file_processing import model_type
  15. from docs.config import oss_txt_config
  16. from docs.config import duplicatesRedisConfig
  17. from util.hash_file import cal_md5
  18. from util.redis_helper import RedisString
  19. FS = FileServeClient(oss_txt_config)
  20. Redis = RedisString(duplicatesRedisConfig)
  21. class FileApps(object):
  22. def __init__(self, request):
  23. self._file_name = request.get("file_name", "")
  24. self._file_url = request.get("file_url", "")
  25. self._file_bytes = request.get("file_bytes", "")
  26. self._file_type = request.get("file_type", "")
  27. self._return_type = request.get("return_type", 0)
  28. self._extract_type = request.get("extract_type", 0)
  29. self._base_dir = ""
  30. def start(self):
  31. file, self._base_dir = self.__save()
  32. if (not file) or (not os.path.exists(file)):
  33. return [self.create_response(file_path=self._file_name, content_result="", state=error_number["下载错误"])]
  34. response_object = self.__convert_start([file], [])
  35. # # 清理文件夹
  36. del_directory(self._base_dir)
  37. return response_object
  38. def __convert_start(self, files_path, result):
  39. response_object, un_files = self.compress_method(files_path)
  40. result.extend(response_object)
  41. if un_files:
  42. return self.__convert_start(un_files, result)
  43. return result
  44. # 主调度
  45. @limit_decor(700, 3)
  46. def compress_method(self, files_path):
  47. un_files = [] # 带解压文件
  48. response_object = []
  49. for file_path in files_path:
  50. try:
  51. suffix = file_path.split(".")[-1].lower()
  52. file_md5 = cal_md5(file_path)
  53. if suffix not in model_type:
  54. response_object.append(
  55. self.create_response(file_path=file_path, content_result="", state=error_number["类型不支持"]))
  56. continue
  57. if self._return_type == 0 or self._return_type == 2:
  58. exists_url = Redis.string_get(file_md5)
  59. if exists_url:
  60. file_name = os.path.basename(file_path)
  61. file_path = file_path.replace(self._base_dir, "") # 清除文件夹显示
  62. text_content = FS.download_text_content(exists_url) if self._return_type == 2 else ""
  63. response_object.append(fileText_pb2.Result(fileName=file_name, textContent=text_content,
  64. textUrl=exists_url, filePath=file_path,
  65. errorState=error_number["成功"]))
  66. continue
  67. content_result, state = file_processing.convert_start(file_path, suffix)
  68. if isinstance(content_result, (list, tuple)):
  69. un_files.extend(content_result)
  70. else:
  71. response_object.append(
  72. self.create_response(file_path=file_path, content_result=content_result, state=state,
  73. file_md5=file_md5))
  74. except Exception as e:
  75. response_object.append(
  76. self.create_response(file_path=file_path, content_result="", state=error_number["解析错误"],
  77. file_md5=""))
  78. logger.warning(f"附件解析失败{file_path}-{e}")
  79. return response_object, un_files
  80. def __save(self):
  81. """
  82. 文件储存
  83. :return:
  84. """
  85. file_dir = ""
  86. try:
  87. base_dir = os.path.dirname("./docs/")
  88. fs_name = str(uuid.uuid1())
  89. file_dir = os.path.join(base_dir, fs_name)
  90. generate_directory(file_dir)
  91. file_path = os.path.join(file_dir, self._file_name)
  92. print(f"save_type:{self._extract_type},file_name:{file_path},file_url:{self._file_url}")
  93. if self._extract_type == 0:
  94. status, _ = download(self._file_url, file_path, 1)
  95. elif self._extract_type == 1:
  96. status, _ = save_file(file=self._file_bytes, filename=file_path)
  97. elif self._extract_type == 2:
  98. status, _ = download(self._file_url, file_path, 2)
  99. else:
  100. status = "500"
  101. if status == "500":
  102. return "", ""
  103. except Exception as e:
  104. print("save false-->", e)
  105. del_directory(file_dir)
  106. return "", ""
  107. return file_path, file_dir
  108. def create_response(self, file_path: str, content_result: str, state: str, file_md5: str):
  109. file_name = os.path.basename(file_path) # 获取文件名称
  110. file_path = file_path.replace(self._base_dir, "") # 清除文件夹显示
  111. url = ""
  112. oss_url = str(uuid.uuid1())
  113. if self._return_type == 0 and content_result:
  114. status, request_id = FS.upload_text_file(oss_url, content_result)
  115. if status == 200:
  116. url = oss_url
  117. content_result = ""
  118. elif self._return_type == 2 and content_result:
  119. status, request_id = FS.upload_text_file(oss_url, content_result)
  120. if status == 200:
  121. url = oss_url
  122. if url:
  123. insert_redis = Redis.string_set(file_md5, url)
  124. if not insert_redis:
  125. logger.warning(f"文件去重失败,插入失败--->{url}")
  126. response_object = fileText_pb2.Result(fileName=file_name, textContent=content_result, textUrl=url,
  127. filePath=file_path, errorState=state)
  128. return response_object