download.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import requests
  2. import urllib3
  3. from requests.models import Response
  4. from config.load import headers
  5. urllib3.disable_warnings()
  6. class Downloader:
  7. @staticmethod
  8. def _requests_by_get(url, max_retries=3, **kw):
  9. request_params = {}
  10. request_params.setdefault('allow_redirects', False)
  11. request_params.setdefault('timeout', 15)
  12. request_params.setdefault('headers', headers)
  13. for key, val in kw.items():
  14. if key in request_params:
  15. request_params.update({key: val})
  16. else:
  17. request_params.setdefault(key, val)
  18. retries = 0
  19. response = Response()
  20. response.encoding = 'utf-8'
  21. response.status_code = 10400
  22. setattr(response, 'loop_times', 1) # 异常通信频次默认值1
  23. while retries < max_retries:
  24. try:
  25. response = requests.get(url, **request_params)
  26. response.encoding = response.apparent_encoding
  27. setattr(response, 'loop_times', 0) # 正常通信频次默认值0
  28. break
  29. except requests.exceptions.SSLError as e:
  30. response.reason = e.__class__.__name__
  31. if 'verify' not in request_params:
  32. request_params.setdefault('verify', True)
  33. else:
  34. url = url.replace('https', 'http')
  35. retries += 1
  36. except requests.RequestException as e:
  37. response.reason = e.__class__.__name__
  38. retries += 1
  39. return response
  40. def get(self, url, **kw):
  41. """
  42. 网络请求
  43. :param url: 访问地址
  44. :param kw: 额外请求参数
  45. :return: 响应对象
  46. """
  47. return self._requests_by_get(url, **kw)
  48. if __name__ == '__main__':
  49. # r = Downloader().get('https://news.lzu.edu.cn/c/202204/88967.html')
  50. r = Downloader().get('http://www.xxls.gov.cn/')
  51. print(r, r.loop_times)