Browse Source

add Delete inline styles

dongzhaorui 3 years ago
parent
commit
5335f89da3
1 changed files with 4 additions and 9 deletions
  1. 4 9
      find_source/crawler/utils.py

+ 4 - 9
find_source/crawler/utils.py

@@ -204,6 +204,10 @@ def normalize_node(element: HtmlElement):
         if style:
             del node.attrib['style']
 
+        # Obsolete scroll property
+        if node.tag.lower() == 'marquee':
+            remove_node(node)
+
     # 删除包含干扰属性的节点(完全匹配)
     for node, _ in iter_node(element):
         attr = (node.get('id') or node.get('class'))
@@ -212,15 +216,6 @@ def normalize_node(element: HtmlElement):
                 remove_node(node)
                 break
 
-    # # 删除无效节点(模糊匹配)
-    # for node, _ in iter_node(element):
-    #     attrib = (node.get('id') or node.get('class'))
-    #     if attrib:
-    #         for attr in USELESS_ATTR:
-    #             if re.match(attr, attrib.lower()) is not None:
-    #                 remove_node(node)
-    #                 break
-
 
 def pre_parse(element):
     normalize_node(element)