|
@@ -2,12 +2,9 @@ import datetime
|
|
import hashlib
|
|
import hashlib
|
|
import re
|
|
import re
|
|
import time
|
|
import time
|
|
-from collections import namedtuple
|
|
|
|
|
|
|
|
from lxml.html import HtmlElement, fromstring, tostring
|
|
from lxml.html import HtmlElement, fromstring, tostring
|
|
|
|
|
|
-SearchText = namedtuple('SearchText', ['total'])
|
|
|
|
-
|
|
|
|
|
|
|
|
def element2html(element: HtmlElement) -> str:
|
|
def element2html(element: HtmlElement) -> str:
|
|
return tostring(element, encoding="utf-8").decode()
|
|
return tostring(element, encoding="utf-8").decode()
|
|
@@ -16,6 +13,7 @@ def element2html(element: HtmlElement) -> str:
|
|
def html2element(html_str: str) -> HtmlElement:
|
|
def html2element(html_str: str) -> HtmlElement:
|
|
html_str = re.sub('</?br.*?>', '', html_str)
|
|
html_str = re.sub('</?br.*?>', '', html_str)
|
|
html_str = re.sub(r'<\?xml.*?>', '', html_str)
|
|
html_str = re.sub(r'<\?xml.*?>', '', html_str)
|
|
|
|
+ html_str = re.sub(r'<DOCTYPE.*?>', '', html_str)
|
|
return fromstring(html_str)
|
|
return fromstring(html_str)
|
|
|
|
|
|
|
|
|