|
@@ -13,8 +13,10 @@ def element2html(element: HtmlElement) -> str:
|
|
|
return tostring(element, encoding="utf-8").decode()
|
|
|
|
|
|
|
|
|
-def html2element(html: str) -> HtmlElement:
|
|
|
- return fromstring(html)
|
|
|
+def html2element(html_str: str) -> HtmlElement:
|
|
|
+ html_str = re.sub('</?br.*?>', '', html_str)
|
|
|
+ html_str = re.sub(r'<\?xml.*?>', '', html_str)
|
|
|
+ return fromstring(html_str)
|
|
|
|
|
|
|
|
|
def valid_element(node: HtmlElement, feature: str):
|