|
@@ -0,0 +1,26 @@
|
|
|
+from urllib3 import get_host
|
|
|
+
|
|
|
+from common.log import logger
|
|
|
+from common.tools import html2element
|
|
|
+
|
|
|
+
|
|
|
+def err_details(worker):
|
|
|
+ worker_exception = worker.exception()
|
|
|
+ if worker_exception:
|
|
|
+ logger.exception("Worker return exception: {}".format(worker_exception))
|
|
|
+ return worker
|
|
|
+
|
|
|
+
|
|
|
+def extract_base_url(url):
|
|
|
+ """
|
|
|
+
|
|
|
+ # >>> base_url = extract_base_url('http://192.168.3.207:8080/')
|
|
|
+ """
|
|
|
+ _s, _h, _p = get_host(url)
|
|
|
+ return f"{_s}://{_h}/" if _p is None else f"{_s}://{_h}:{_p}/"
|
|
|
+
|
|
|
+
|
|
|
+def extract_page_title(html):
|
|
|
+ element = html2element(html)
|
|
|
+ return "".join(element.xpath('/html/head/title/text()')).strip()
|
|
|
+
|