download.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import requests
  2. import urllib3
  3. from requests.models import Response
  4. from config.load import headers
  5. urllib3.disable_warnings()
  6. class Downloader:
  7. @staticmethod
  8. def _requests_by_get(url, max_retries=3, **kw):
  9. request_params = {}
  10. request_params.setdefault('allow_redirects', False)
  11. request_params.setdefault('timeout', 15)
  12. request_params.setdefault('headers', headers)
  13. for key, val in kw.items():
  14. if key == 'headers':
  15. for k, v in val.items():
  16. if str(k).lower() == 'user-agent':
  17. del headers['User-Agent']
  18. headers.update(val)
  19. request_params.update({'headers': headers})
  20. elif key in request_params:
  21. request_params.update({key: val})
  22. else:
  23. request_params.setdefault(key, val)
  24. retries = 0
  25. response = Response()
  26. response.encoding = 'utf-8'
  27. response.status_code = 10400
  28. setattr(response, 'loop_times', 1) # 异常通信频次默认值1
  29. while retries < max_retries:
  30. try:
  31. response = requests.get(url, **request_params)
  32. setattr(response, 'loop_times', 0) # 正常通信频次默认值0
  33. break
  34. except requests.exceptions.SSLError as e:
  35. response.reason = e.__class__.__name__
  36. if 'verify' not in request_params:
  37. request_params.setdefault('verify', True)
  38. else:
  39. url = url.replace('https', 'http')
  40. retries += 1
  41. except requests.RequestException as e:
  42. response.reason = e.__class__.__name__
  43. retries += 1
  44. return response
  45. def get(self, url, **kw):
  46. """
  47. 网络请求
  48. :param url: 访问地址
  49. :param kw: 额外请求参数
  50. :return: 响应对象
  51. """
  52. return self._requests_by_get(url, **kw)
  53. # if __name__ == '__main__':
  54. # r = Downloader().get('https://news.lzu.edu.cn/c/202204/88967.html')
  55. # print(r, r.loop_times)