utils.py 638 B

1234567891011121314151617181920212223242526
  1. from urllib3 import get_host
  2. from common.log import logger
  3. from common.tools import html2element
  4. def err_details(worker):
  5. worker_exception = worker.exception()
  6. if worker_exception:
  7. logger.exception("Worker return exception: {}".format(worker_exception))
  8. return worker
  9. def extract_base_url(url):
  10. """
  11. # >>> base_url = extract_base_url('http://192.168.3.207:8080/')
  12. """
  13. _s, _h, _p = get_host(url)
  14. return f"{_s}://{_h}/" if _p is None else f"{_s}://{_h}:{_p}/"
  15. def extract_page_title(html):
  16. element = html2element(html)
  17. return "".join(element.xpath('/html/head/title/text()')).strip()