|
@@ -204,6 +204,10 @@ def normalize_node(element: HtmlElement):
|
|
|
if style:
|
|
|
del node.attrib['style']
|
|
|
|
|
|
+ # Obsolete scroll property
|
|
|
+ if node.tag.lower() == 'marquee':
|
|
|
+ remove_node(node)
|
|
|
+
|
|
|
# 删除包含干扰属性的节点(完全匹配)
|
|
|
for node, _ in iter_node(element):
|
|
|
attr = (node.get('id') or node.get('class'))
|
|
@@ -212,15 +216,6 @@ def normalize_node(element: HtmlElement):
|
|
|
remove_node(node)
|
|
|
break
|
|
|
|
|
|
- # # 删除无效节点(模糊匹配)
|
|
|
- # for node, _ in iter_node(element):
|
|
|
- # attrib = (node.get('id') or node.get('class'))
|
|
|
- # if attrib:
|
|
|
- # for attr in USELESS_ATTR:
|
|
|
- # if re.match(attr, attrib.lower()) is not None:
|
|
|
- # remove_node(node)
|
|
|
- # break
|
|
|
-
|
|
|
|
|
|
def pre_parse(element):
|
|
|
normalize_node(element)
|