dongzhaorui 3 жил өмнө
parent
commit
6f8a989ce0

+ 9 - 0
find_source/crawler/utils.py

@@ -20,6 +20,15 @@ def extract_base_url(url):
     return f"{_s}://{_h}/" if _p is None else f"{_s}://{_h}:{_p}/"
 
 
+def extract_host(url):
+    """
+
+    # >>> base_url = extract_host('http://192.168.3.207:8080/')
+    """
+    _, host, port = get_host(url)
+    return f"{host}" if port is None else f"{host}:{port}"
+
+
 def extract_page_title(html):
     element = html2element(html)
     return "".join(element.xpath('/html/head/title/text()')).strip()