ai
/
nsq_convert2txt


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
							# coding:utf-8
from file_processing.file_docx import read_docx
from file_processing.file_doc import read_doc
from file_processing.file_xlsx import read_xlsx
from file_processing.file_swf import read_swf
from file_processing.file_compress import CompressFiles
from file_processing.file_pdf import PdfRead
from file_processing.file_picture import execute_ocr
from loguru import logger
from docs.config import error_number
from file_processing.file_txt import read_txt
from file_processing.file_picture import ocr, ocr_patch

compress_file = CompressFiles()
pdf_read = PdfRead()

model_type = {
    "pdf": pdf_read.read_pdf,
    "doc": read_doc,
    "docx": read_docx,
    "xlsx": read_xlsx,
    "xls": read_xlsx,
    "txt": read_txt,
    "swf": read_swf,
    "rar": compress_file.extract_file,
    "zip": compress_file.extract_file,
    "jpg": ocr,
    "png": ocr,
    "jpeg": ocr
}

__all__ = [compress_file, model_type]


def convert_start(file_path, file_type) -> any:
    """
    文件转换开始
    :param file_path:
    :param file_type:
    :return:
    """
    try:
        text, state = model_type[file_type](file_path)
    except Exception as e:
        logger.warning(f"文件解析失败--》{e}")
        return "", error_number["解析错误"]
    return text, state