attach_preprocess.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # coding:utf-8
  2. from util.htmltag import CleanKeepTable
  3. from typing import AnyStr
  4. from loguru import logger
  5. class AttachHelper(object):
  6. @staticmethod
  7. def __clear_tag(content: AnyStr) -> AnyStr:
  8. """
  9. 清理标签
  10. :param content:
  11. :return:
  12. """
  13. try:
  14. tag_html = CleanKeepTable(content) # 保留 table标签
  15. except Exception as e:
  16. logger.warning(e)
  17. return content
  18. return tag_html
  19. @staticmethod
  20. def __add_space(content_html):
  21. """
  22. 为表格添加空格
  23. :param content_html:
  24. :return:
  25. """
  26. if "<table" not in content_html:
  27. return content_html
  28. if "<thead>" in content_html:
  29. content_html = content_html.replace('<thead>', '')
  30. if "<br/" in content_html:
  31. content_html = content_html.replace(r'<br/', '')
  32. if "<table " in content_html and "<td " in content_html and "<tr " in content_html:
  33. return content_html
  34. content_html = content_html.replace('<tbody', '<tbody ')
  35. content_html = content_html.replace('<table', '<table ')
  36. content_html = content_html.replace('<th', '<th ')
  37. content_html = content_html.replace('<tr', '<tr ')
  38. content_html = content_html.replace('<td', '<td ')
  39. return content_html
  40. def preprocess(self, content: AnyStr) -> AnyStr:
  41. """
  42. 处理开始
  43. :param content:
  44. :return:
  45. """
  46. content = self.__add_space(content)
  47. content = self.__clear_tag(content)
  48. return content