download.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. import threading
  2. import requests
  3. import urllib3
  4. from loguru import logger
  5. from requests.adapters import HTTPAdapter
  6. from requests.models import Response, REDIRECT_STATI
  7. from urllib3.util.retry import Retry
  8. from config.load import headers
  9. urllib3.disable_warnings()
  10. '''特殊编码需要解码'''
  11. SPECIAL_ENCODINGS = [
  12. 'Windows-1254'
  13. ]
  14. class Downloader:
  15. def __init__(self, connect=5, backoff_factor=0.1):
  16. self._connect = connect
  17. self._backoff_factor = backoff_factor
  18. self._max_retries = 3
  19. @staticmethod
  20. def prepare_params(**kw):
  21. request_params = {}
  22. request_params.setdefault('allow_redirects', False)
  23. request_params.setdefault('timeout', (kw.pop('timeout', None) or 10))
  24. for key, val in kw.items():
  25. if key != 'headers' and key in request_params:
  26. request_params.update({key: val})
  27. else:
  28. request_params.setdefault(key, val)
  29. request_headers = (kw.pop('headers', None) or headers)
  30. for key, val in request_headers.items():
  31. if key in request_headers:
  32. request_headers.update({key: val})
  33. else:
  34. request_headers.setdefault(key, val)
  35. request_params.setdefault('headers', request_headers)
  36. return request_params
  37. def _requests_by_get(self, url, **kw):
  38. disable_debug_log = kw.pop('disable_debug_log', True)
  39. request_params = self.prepare_params(**kw)
  40. response = Response()
  41. response.encoding = 'utf-8'
  42. response.status_code = 10001
  43. retries = 0
  44. while retries < self._max_retries:
  45. try:
  46. response = self._session.get(url, **request_params)
  47. # 解决重定向的网站
  48. if response.status_code in REDIRECT_STATI:
  49. request_params.update({'allow_redirects': True})
  50. continue
  51. response.encoding = response.apparent_encoding
  52. if response.encoding in SPECIAL_ENCODINGS:
  53. response.encoding = 'utf-8'
  54. break
  55. except requests.exceptions.SSLError as e:
  56. response.reason = e.__class__.__name__
  57. if 'verify' not in request_params:
  58. request_params.setdefault('verify', False)
  59. else:
  60. if 'verify' in request_params:
  61. del request_params['verify']
  62. url = url.replace('https', 'http')
  63. retries += 1
  64. except requests.RequestException as e:
  65. response.reason = e.__class__.__name__
  66. retries += 1
  67. if not disable_debug_log:
  68. t_name = threading.currentThread().getName()
  69. logger.debug(f'<{t_name}> - {url} - 响应 - {response}')
  70. return response
  71. def get(self, url, **kw):
  72. """
  73. 网络请求
  74. :param url: 访问地址
  75. :param kw: requests.GET请求参数
  76. :return: 响应对象
  77. """
  78. return self._requests_by_get(url, **kw)
  79. @property
  80. def _session(self):
  81. retry = Retry(
  82. connect=self._connect,
  83. backoff_factor=self._backoff_factor
  84. )
  85. adapter = HTTPAdapter(max_retries=retry)
  86. session = requests.Session()
  87. session.mount('http://', adapter)
  88. session.mount('https://', adapter)
  89. return session