download.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. import requests
  2. import urllib3
  3. from requests.adapters import HTTPAdapter
  4. from requests.models import Response, REDIRECT_STATI
  5. from urllib3.util.retry import Retry
  6. from config.load import headers
  7. urllib3.disable_warnings()
  8. '''特殊编码需要解码'''
  9. SPECIAL_ENCODINGS = [
  10. 'Windows-1254'
  11. ]
  12. class Downloader:
  13. def __init__(self, connect=5, backoff_factor=0.1):
  14. self._connect = connect
  15. self._backoff_factor = backoff_factor
  16. self._max_retries = 3
  17. @staticmethod
  18. def prepare_params(**kw):
  19. request_params = {}
  20. request_params.setdefault('allow_redirects', False)
  21. request_params.setdefault('timeout', (kw.pop('timeout', None) or 10))
  22. for key, val in kw.items():
  23. if key != 'headers' and key in request_params:
  24. request_params.update({key: val})
  25. else:
  26. request_params.setdefault(key, val)
  27. request_headers = (kw.pop('headers', None) or headers)
  28. for key, val in request_headers.items():
  29. if key in request_headers:
  30. request_headers.update({key: val})
  31. else:
  32. request_headers.setdefault(key, val)
  33. request_params.setdefault('headers', request_headers)
  34. return request_params
  35. def _requests_by_get(self, url, **kw):
  36. request_params = self.prepare_params(**kw)
  37. response = Response()
  38. response.encoding = 'utf-8'
  39. response.status_code = 10001
  40. retries = 0
  41. while retries < self._max_retries:
  42. try:
  43. response = self._session.get(url, **request_params)
  44. # 解决重定向的网站
  45. if response.status_code in REDIRECT_STATI:
  46. request_params.pop('allow_redirects')
  47. continue
  48. response.encoding = response.apparent_encoding
  49. if response.encoding in SPECIAL_ENCODINGS:
  50. response.encoding = 'utf-8'
  51. break
  52. except requests.exceptions.SSLError as e:
  53. response.reason = e.__class__.__name__
  54. if 'verify' not in request_params:
  55. request_params.setdefault('verify', False)
  56. else:
  57. if 'verify' in request_params:
  58. del request_params['verify']
  59. url = url.replace('https', 'http')
  60. retries += 1
  61. except requests.RequestException as e:
  62. response.reason = e.__class__.__name__
  63. retries += 1
  64. return response
  65. def get(self, url, **kw):
  66. """
  67. 网络请求
  68. :param url: 访问地址
  69. :param kw: requests.GET请求参数
  70. :return: 响应对象
  71. """
  72. return self._requests_by_get(url, **kw)
  73. @property
  74. def _session(self):
  75. retry = Retry(
  76. connect=self._connect,
  77. backoff_factor=0.1
  78. )
  79. adapter = HTTPAdapter(max_retries=retry)
  80. session = requests.Session()
  81. session.mount('http://', adapter)
  82. session.mount('https://', adapter)
  83. return session