1234567891011121314151617181920212223242526 |
- from urllib3 import get_host
- from common.log import logger
- from common.tools import html2element
- def err_details(worker):
- worker_exception = worker.exception()
- if worker_exception:
- logger.exception("Worker return exception: {}".format(worker_exception))
- return worker
- def extract_base_url(url):
- """
- # >>> base_url = extract_base_url('http://192.168.3.207:8080/')
- """
- _s, _h, _p = get_host(url)
- return f"{_s}://{_h}/" if _p is None else f"{_s}://{_h}:{_p}/"
- def extract_page_title(html):
- element = html2element(html)
- return "".join(element.xpath('/html/head/title/text()')).strip()
|