123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- # coding:utf-8
- from util.file_operations import save_file
- from util.file_operations import generate_directory
- from util.file_operations import del_directory
- from util.attach_tools import download
- import file_processing
- import os
- import uuid
- from util.check_timeout import limit_decor
- from loguru import logger
- from proto import fileText_pb2
- from util.fs_client import FileServeClient
- from docs.config import error_number
- from file_processing import model_type
- from docs.config import oss_txt_config
- from docs.config import duplicatesRedisConfig
- from util.hash_file import cal_md5
- from util.redis_helper import RedisString
- FS = FileServeClient(oss_txt_config)
- Redis = RedisString(duplicatesRedisConfig)
- class FileApps(object):
- def __init__(self, request):
- self._file_name = request.get("file_name", "")
- self._file_url = request.get("file_url", "")
- self._file_bytes = request.get("file_bytes", "")
- self._file_type = request.get("file_type", "")
- self._return_type = request.get("return_type", 0)
- self._extract_type = request.get("extract_type", 0)
- self._base_dir = ""
- def start(self):
- file, self._base_dir = self.__save()
- if (not file) or (not os.path.exists(file)):
- return [self.create_response(file_path=self._file_name, content_result="", state=error_number["下载错误"])]
- response_object = self.__convert_start([file], [])
- # # 清理文件夹
- del_directory(self._base_dir)
- return response_object
- def __convert_start(self, files_path, result):
- response_object, un_files = self.compress_method(files_path)
- result.extend(response_object)
- if un_files:
- return self.__convert_start(un_files, result)
- return result
- # 主调度
- @limit_decor(700, 3)
- def compress_method(self, files_path):
- un_files = [] # 带解压文件
- response_object = []
- for file_path in files_path:
- try:
- suffix = file_path.split(".")[-1].lower()
- file_md5 = cal_md5(file_path)
- if suffix not in model_type:
- response_object.append(
- self.create_response(file_path=file_path, content_result="", state=error_number["类型不支持"]))
- continue
- if self._return_type == 0 or self._return_type == 2:
- exists_url = Redis.string_get(file_md5)
- if exists_url:
- file_name = os.path.basename(file_path)
- file_path = file_path.replace(self._base_dir, "") # 清除文件夹显示
- text_content = FS.download_text_content(exists_url) if self._return_type == 2 else ""
- response_object.append(fileText_pb2.Result(fileName=file_name, textContent=text_content,
- textUrl=exists_url, filePath=file_path,
- errorState=error_number["成功"]))
- continue
- content_result, state = file_processing.convert_start(file_path, suffix)
- if isinstance(content_result, (list, tuple)):
- un_files.extend(content_result)
- else:
- response_object.append(
- self.create_response(file_path=file_path, content_result=content_result, state=state,
- file_md5=file_md5))
- except Exception as e:
- response_object.append(
- self.create_response(file_path=file_path, content_result="", state=error_number["解析错误"],
- file_md5=""))
- logger.warning(f"附件解析失败{file_path}-{e}")
- return response_object, un_files
- def __save(self):
- """
- 文件储存
- :return:
- """
- file_dir = ""
- try:
- base_dir = os.path.dirname("./docs/")
- fs_name = str(uuid.uuid1())
- file_dir = os.path.join(base_dir, fs_name)
- generate_directory(file_dir)
- file_path = os.path.join(file_dir, self._file_name)
- print(f"save_type:{self._extract_type},file_name:{file_path},file_url:{self._file_url}")
- if self._extract_type == 0:
- status, _ = download(self._file_url, file_path, 1)
- elif self._extract_type == 1:
- status, _ = save_file(file=self._file_bytes, filename=file_path)
- elif self._extract_type == 2:
- status, _ = download(self._file_url, file_path, 2)
- else:
- status = "500"
- if status == "500":
- return "", ""
- except Exception as e:
- print("save false-->", e)
- del_directory(file_dir)
- return "", ""
- return file_path, file_dir
- def create_response(self, file_path: str, content_result: str, state: str, file_md5: str):
- file_name = os.path.basename(file_path) # 获取文件名称
- file_path = file_path.replace(self._base_dir, "") # 清除文件夹显示
- url = ""
- oss_url = str(uuid.uuid1())
- if self._return_type == 0 and content_result:
- status, request_id = FS.upload_text_file(oss_url, content_result)
- if status == 200:
- url = oss_url
- content_result = ""
- elif self._return_type == 2 and content_result:
- status, request_id = FS.upload_text_file(oss_url, content_result)
- if status == 200:
- url = oss_url
- if url:
- insert_redis = Redis.string_set(file_md5, url)
- if not insert_redis:
- logger.warning(f"文件去重失败,插入失败--->{url}")
- response_object = fileText_pb2.Result(fileName=file_name, textContent=content_result, textUrl=url,
- filePath=file_path, errorState=state)
- return response_object
|