download.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. import requests
  2. import urllib3
  3. from requests.models import Response
  4. from config.load import headers
  5. urllib3.disable_warnings()
  6. class Downloader:
  7. @staticmethod
  8. def _requests_by_get(url, max_retries=3, **kw):
  9. request_params = {}
  10. request_params.setdefault('allow_redirects', False)
  11. request_params.setdefault('timeout', 15)
  12. request_params.setdefault('headers', headers)
  13. for key, val in kw.items():
  14. if key in request_params:
  15. request_params.update({key: val})
  16. else:
  17. request_params.setdefault(key, val)
  18. retries = 0
  19. response = Response()
  20. response.encoding = 'utf-8'
  21. response.status_code = 10400
  22. setattr(response, 'loop_times', 1) # 异常通信频次默认值1
  23. while retries < max_retries:
  24. try:
  25. response = requests.get(url, **request_params)
  26. setattr(response, 'loop_times', 0) # 正常通信频次默认值0
  27. break
  28. except requests.exceptions.SSLError as e:
  29. response.reason = e.__class__.__name__
  30. if 'verify' not in request_params:
  31. request_params.setdefault('verify', True)
  32. else:
  33. url = url.replace('https', 'http')
  34. retries += 1
  35. except requests.RequestException as e:
  36. response.reason = e.__class__.__name__
  37. retries += 1
  38. return response
  39. def get(self, url, **kw):
  40. """
  41. 网络请求
  42. :param url: 访问地址
  43. :param kw: 额外请求参数
  44. :return: 响应对象
  45. """
  46. return self._requests_by_get(url, **kw)
  47. # if __name__ == '__main__':
  48. # r = Downloader().get('https://news.lzu.edu.cn/c/202204/88967.html')
  49. # print(r, r.loop_times)