download.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. import requests
  2. import urllib3
  3. from requests.models import Response, REDIRECT_STATI
  4. from config.load import headers
  5. urllib3.disable_warnings()
  6. '''特殊编码需要解码'''
  7. SPECIAL_ENCODINGS = [
  8. 'Windows-1254'
  9. ]
  10. class Downloader:
  11. def __init__(self):
  12. self.timeout = 15
  13. self.max_retries = 3
  14. def prepare_params(self, **kw):
  15. request_params = {}
  16. request_params.setdefault('allow_redirects', False)
  17. request_params.setdefault('timeout', self.timeout)
  18. for key, val in kw.items():
  19. if key != 'headers' and key in request_params:
  20. request_params.update({key: val})
  21. else:
  22. request_params.setdefault(key, val)
  23. request_headers = (kw.get('headers') or headers)
  24. for key, val in request_headers.items():
  25. if key in request_headers:
  26. request_headers.update({key: val})
  27. else:
  28. request_headers.setdefault(key, val)
  29. request_params.setdefault('headers', request_headers)
  30. return request_params
  31. def _requests_by_get(self, url, **kw):
  32. request_params = self.prepare_params(**kw)
  33. response = Response()
  34. response.encoding = 'utf-8'
  35. response.status_code = 10400
  36. retries = 0
  37. while retries < self.max_retries:
  38. try:
  39. response = requests.get(url, **request_params)
  40. # 解决重定向的网站
  41. if response.status_code in REDIRECT_STATI:
  42. request_params.pop('allow_redirects')
  43. continue
  44. response.encoding = response.apparent_encoding
  45. if response.encoding in SPECIAL_ENCODINGS:
  46. response.encoding = 'utf-8'
  47. break
  48. except requests.exceptions.SSLError as e:
  49. response.reason = e.__class__.__name__
  50. if 'verify' not in request_params:
  51. request_params.setdefault('verify', True)
  52. else:
  53. url = url.replace('https', 'http')
  54. retries += 1
  55. except requests.RequestException as e:
  56. response.reason = e.__class__.__name__
  57. retries += 1
  58. return response
  59. def get(self, url, **kw):
  60. """
  61. 网络请求
  62. :param url: 访问地址
  63. :param kw: requests.GET请求参数
  64. :return: 响应对象
  65. """
  66. return self._requests_by_get(url, **kw)