Эх сурвалжийг харах

添加DOCTYPE头部兼容性处理

dongzhaorui 3 жил өмнө
parent
commit
eaedec859f

+ 1 - 3
find_source/common/tools.py

@@ -2,12 +2,9 @@ import datetime
 import hashlib
 import re
 import time
-from collections import namedtuple
 
 from lxml.html import HtmlElement, fromstring, tostring
 
-SearchText = namedtuple('SearchText', ['total'])
-
 
 def element2html(element: HtmlElement) -> str:
     return tostring(element, encoding="utf-8").decode()
@@ -16,6 +13,7 @@ def element2html(element: HtmlElement) -> str:
 def html2element(html_str: str) -> HtmlElement:
     html_str = re.sub('</?br.*?>', '', html_str)
     html_str = re.sub(r'<\?xml.*?>', '', html_str)
+    html_str = re.sub(r'<DOCTYPE.*?>', '', html_str)
     return fromstring(html_str)