from urllib3 import get_host from common.log import logger from common.tools import html2element def err_details(worker): worker_exception = worker.exception() if worker_exception: logger.exception("Worker return exception: {}".format(worker_exception)) return worker def extract_base_url(url): """ # >>> base_url = extract_base_url('http://192.168.3.207:8080/') """ _s, _h, _p = get_host(url) return f"{_s}://{_h}/" if _p is None else f"{_s}://{_h}:{_p}/" def extract_domain(url): """ # >>> base_url = extract_domain('http://192.168.3.207:8080/') """ _, host, port = get_host(url) return f"{host}" if port is None else f"{host}:{port}" def extract_page_title(html): element = html2element(html) nodes = element.xpath('/html/head/title/text()') if len(nodes) > 1: return "".format(nodes[-1]).strip() return "".join(nodes).strip()