dongzhaorui 3 years ago
parent
commit
263ab92f46
1 changed files with 4 additions and 1 deletions
  1. 4 1
      find_source/crawler/utils.py

+ 4 - 1
find_source/crawler/utils.py

@@ -31,4 +31,7 @@ def extract_domain(url):
 
 def extract_page_title(html):
     element = html2element(html)
-    return "".join(element.xpath('/html/head/title/text()')).strip()
+    nodes = element.xpath('/html/head/title/text()')
+    if len(nodes) > 1:
+        return "".format(nodes[-1]).strip()
+    return "".join(nodes).strip()