__init__.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # coding:utf-8
  2. from file_processing.file_docx import read_docx
  3. from file_processing.file_doc import read_doc
  4. from file_processing.file_xlsx import read_xlsx
  5. from file_processing.file_swf import read_swf
  6. from file_processing.file_compress import CompressFiles
  7. from file_processing.file_pdf import PdfRead
  8. from file_processing.file_picture import execute_ocr
  9. from loguru import logger
  10. from docs.config import error_number
  11. from file_processing.file_txt import read_txt
  12. from file_processing.file_picture import ocr, ocr_patch
  13. compress_file = CompressFiles()
  14. pdf_read = PdfRead()
  15. model_type = {
  16. "pdf": pdf_read.read_pdf,
  17. "doc": read_doc,
  18. "docx": read_docx,
  19. "xlsx": read_xlsx,
  20. "xls": read_xlsx,
  21. "txt": read_txt,
  22. "swf": read_swf,
  23. "rar": compress_file.extract_file,
  24. "zip": compress_file.extract_file,
  25. "jpg": ocr,
  26. "png": ocr,
  27. "jpeg": ocr
  28. }
  29. __all__ = [compress_file, model_type]
  30. def convert_start(file_path, file_type) -> any:
  31. """
  32. 文件转换开始
  33. :param file_path:
  34. :param file_type:
  35. :return:
  36. """
  37. try:
  38. text, state = model_type[file_type](file_path)
  39. except Exception as e:
  40. logger.warning(f"文件解析失败--》{e}")
  41. return "", error_number["解析错误"]
  42. return text, state