|
@@ -11,9 +11,10 @@ def element2html(element: HtmlElement) -> str:
|
|
|
|
|
|
|
|
|
def html2element(html_str: str) -> HtmlElement:
|
|
|
+ html_str = re.sub('\ufeff|\xa0|\u3000', '', html_str)
|
|
|
html_str = re.sub('</?br.*?>', '', html_str)
|
|
|
html_str = re.sub(r'<\?xml.*?>', '', html_str)
|
|
|
- html_str = re.sub(r'<DOCTYPE.*?>', '', html_str)
|
|
|
+ html_str = re.sub(r'<[!]DOCTYPE.*?>', '', html_str)
|
|
|
return fromstring(html_str)
|
|
|
|
|
|
|