|
@@ -9,28 +9,38 @@ urllib3.disable_warnings()
|
|
|
|
|
|
class Downloader:
|
|
|
|
|
|
- @staticmethod
|
|
|
- def _requests_by_get(url, max_retries=3, **kw):
|
|
|
+ def __init__(self):
|
|
|
+ self.timeout = 15
|
|
|
+ self.allow_redirects = False
|
|
|
+ self.max_retries = 3
|
|
|
+
|
|
|
+ def prepare_params(self, **kw):
|
|
|
request_params = {}
|
|
|
- request_params.setdefault('allow_redirects', False)
|
|
|
- request_params.setdefault('timeout', 15)
|
|
|
- request_params.setdefault('headers', headers)
|
|
|
+ request_params.setdefault('allow_redirects', self.allow_redirects)
|
|
|
+ request_params.setdefault('timeout', self.timeout)
|
|
|
for key, val in kw.items():
|
|
|
- if key in request_params:
|
|
|
+ if key != 'headers' and key in request_params:
|
|
|
request_params.update({key: val})
|
|
|
else:
|
|
|
request_params.setdefault(key, val)
|
|
|
+ request_headers = (kw.get('headers') or headers)
|
|
|
+ for key, val in request_headers.items():
|
|
|
+ if key in request_headers:
|
|
|
+ request_headers.update({key: val})
|
|
|
+ else:
|
|
|
+ request_headers.setdefault(key, val)
|
|
|
+ request_params.setdefault('headers', request_headers)
|
|
|
+ return request_params
|
|
|
|
|
|
- retries = 0
|
|
|
+ def _requests_by_get(self, url, **kw):
|
|
|
+ request_params = self.prepare_params(**kw)
|
|
|
response = Response()
|
|
|
response.encoding = 'utf-8'
|
|
|
response.status_code = 10400
|
|
|
- setattr(response, 'loop_times', 1) # 异常通信频次默认值1
|
|
|
- while retries < max_retries:
|
|
|
+ retries = 0
|
|
|
+ while retries < self.max_retries:
|
|
|
try:
|
|
|
response = requests.get(url, **request_params)
|
|
|
- response.encoding = response.apparent_encoding
|
|
|
- setattr(response, 'loop_times', 0) # 正常通信频次默认值0
|
|
|
break
|
|
|
except requests.exceptions.SSLError as e:
|
|
|
response.reason = e.__class__.__name__
|
|
@@ -49,13 +59,7 @@ class Downloader:
|
|
|
网络请求
|
|
|
|
|
|
:param url: 访问地址
|
|
|
- :param kw: 额外请求参数
|
|
|
+ :param kw: requests.GET请求参数
|
|
|
:return: 响应对象
|
|
|
"""
|
|
|
return self._requests_by_get(url, **kw)
|
|
|
-
|
|
|
-
|
|
|
-# if __name__ == '__main__':
|
|
|
- # r = Downloader().get('https://news.lzu.edu.cn/c/202204/88967.html')
|
|
|
- # r = Downloader().get('http://www.xxls.gov.cn/')
|
|
|
- # print(r, r.loop_times)
|