# coding:utf-8 from file_processing.file_docx import read_docx from file_processing.file_doc import read_doc from file_processing.file_xlsx import read_xlsx from file_processing.file_swf import read_swf from file_processing.file_compress import CompressFiles from file_processing.file_pdf import PdfRead from file_processing.file_picture import execute_ocr from loguru import logger from docs.config import error_number from file_processing.file_txt import read_txt from file_processing.file_picture import ocr, ocr_patch compress_file = CompressFiles() pdf_read = PdfRead() model_type = { "pdf": pdf_read.read_pdf, "doc": read_doc, "docx": read_docx, "xlsx": read_xlsx, "xls": read_xlsx, "txt": read_txt, "swf": read_swf, "rar": compress_file.extract_file, "zip": compress_file.extract_file, "jpg": ocr, "png": ocr, "jpeg": ocr } __all__ = [compress_file, model_type] def convert_start(file_path, file_type) -> any: """ 文件转换开始 :param file_path: :param file_type: :return: """ try: text, state = model_type[file_type](file_path) except Exception as e: logger.warning(f"文件解析失败--》{e}") return "", error_number["解析错误"] return text, state