""" .doc解析 """ import subprocess, os from loguru import logger from file_processing.file_docx import read_docx from docs.config import error_number def read_doc(file_path): out_dir = os.path.dirname(file_path) try: args = 'soffice --headless --convert-to docx %s --outdir %s' % (file_path, out_dir) output = subprocess.check_output(args, shell=True) logger.debug(str(output)) except subprocess.CalledProcessError as e: logger.error('doc文件转换出错') logger.error(e) return '', error_number["解析错误"] file_name = os.path.basename(file_path).split('.')[0] + '.docx' logger.debug('doc文件转docx后文件路径>>>' + os.path.join(out_dir, file_name)) docx_path = os.path.join(out_dir, file_name) return read_docx(docx_path)