dongzhaorui 3 жил өмнө
parent
commit
263ab92f46

+ 4 - 1
find_source/crawler/utils.py

@@ -31,4 +31,7 @@ def extract_domain(url):
 
 def extract_page_title(html):
     element = html2element(html)
-    return "".join(element.xpath('/html/head/title/text()')).strip()
+    nodes = element.xpath('/html/head/title/text()')
+    if len(nodes) > 1:
+        return "".format(nodes[-1]).strip()
+    return "".join(nodes).strip()