1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- # coding:utf-8
- from util.htmltag import CleanKeepTable
- from typing import AnyStr
- from loguru import logger
- class AttachHelper(object):
- @staticmethod
- def __clear_tag(content: AnyStr) -> AnyStr:
- """
- 清理标签
- :param content:
- :return:
- """
- try:
- tag_html = CleanKeepTable(content) # 保留 table标签
- except Exception as e:
- logger.warning(e)
- return content
- return tag_html
- @staticmethod
- def __add_space(content_html):
- """
- 为表格添加空格
- :param content_html:
- :return:
- """
- if "<table" not in content_html:
- return content_html
- if "<thead>" in content_html:
- content_html = content_html.replace('<thead>', '')
- if "<br/" in content_html:
- content_html = content_html.replace(r'<br/', '')
- if "<table " in content_html and "<td " in content_html and "<tr " in content_html:
- return content_html
- content_html = content_html.replace('<tbody', '<tbody ')
- content_html = content_html.replace('<table', '<table ')
- content_html = content_html.replace('<th', '<th ')
- content_html = content_html.replace('<tr', '<tr ')
- content_html = content_html.replace('<td', '<td ')
- return content_html
- def preprocess(self, content: AnyStr) -> AnyStr:
- """
- 处理开始
- :param content:
- :return:
- """
- content = self.__add_space(content)
- content = self.__clear_tag(content)
- return content
|