1234567891011121314151617181920212223 |
- """
- .doc解析
- """
- import subprocess, os
- from loguru import logger
- from file_processing.file_docx import read_docx
- from docs.config import error_number
- def read_doc(file_path):
- out_dir = os.path.dirname(file_path)
- try:
- args = 'soffice --headless --convert-to docx %s --outdir %s' % (file_path, out_dir)
- output = subprocess.check_output(args, shell=True)
- logger.debug(str(output))
- except subprocess.CalledProcessError as e:
- logger.error('doc文件转换出错')
- logger.error(e)
- return '', error_number["解析错误"]
- file_name = os.path.basename(file_path).split('.')[0] + '.docx'
- logger.debug('doc文件转docx后文件路径>>>' + os.path.join(out_dir, file_name))
- docx_path = os.path.join(out_dir, file_name)
- return read_docx(docx_path)
|