12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- # coding:utf-8
- class TableStruct(object):
- def __init__(self):
- """
- 定义表格属性
- """
- self.min_x = None
- self.max_x = None
- self.min_y = None
- self.max_y = None
- self.contents = ""
- def parse(self, table):
- """
- 解析表格结构
- :param table:
- :return:
- """
- contents = table.extract()
- if contents:
- self.contents = self.__table_format(contents)
- self.min_x, self.min_y, self.max_x, self.max_y = table.bbox
- @staticmethod
- def __table_format(data: list):
- """
- 生成table标签数据,结构化数据
- :param data:
- :return:
- """
- if not data:
- return ""
- table = '<table border=1>'
- for row in data:
- table += '<tr>'
- for col in row:
- if col is None:
- col = ''
- table += '<td>%s</td>' % col
- table += '</tr>'
- table += "</table>"
- return table
|