1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- # coding:utf-8
- from file_processing.file_docx import read_docx
- from file_processing.file_doc import read_doc
- from file_processing.file_xlsx import read_xlsx
- from file_processing.file_swf import read_swf
- from file_processing.file_compress import CompressFiles
- from file_processing.file_pdf import PdfRead
- from file_processing.file_picture import execute_ocr
- from loguru import logger
- from docs.config import error_number
- from file_processing.file_txt import read_txt
- from file_processing.file_picture import ocr, ocr_patch
- compress_file = CompressFiles()
- pdf_read = PdfRead()
- model_type = {
- "pdf": pdf_read.read_pdf,
- "doc": read_doc,
- "docx": read_docx,
- "xlsx": read_xlsx,
- "xls": read_xlsx,
- "txt": read_txt,
- "swf": read_swf,
- "rar": compress_file.extract_file,
- "zip": compress_file.extract_file,
- "jpg": ocr,
- "png": ocr,
- "jpeg": ocr
- }
- __all__ = [compress_file, model_type]
- def convert_start(file_path, file_type) -> any:
- """
- 文件转换开始
- :param file_path:
- :param file_type:
- :return:
- """
- try:
- text, state = model_type[file_type](file_path)
- except Exception as e:
- logger.warning(f"文件解析失败--》{e}")
- return "", error_number["解析错误"]
- return text, state
|