|
@@ -22,13 +22,31 @@ SPECIAL_ENCODINGS = [
|
|
|
'Windows-1254',
|
|
|
'ISO-8859-1'
|
|
|
]
|
|
|
+'''每个Session连接池大小'''
|
|
|
+DEFAULT_POOLSIZE = 10
|
|
|
|
|
|
|
|
|
class Downloader:
|
|
|
|
|
|
- def __init__(self, max_retries=3, retry_interval=0.1):
|
|
|
- self._max_retries = max_retries
|
|
|
- self._backoff_factor = retry_interval
|
|
|
+ def __init__(self, max_retries=3, retry_interval=0, **kwargs):
|
|
|
+ self._max_retries = max_retries # 请求错误时的最大重试次数
|
|
|
+ self._backoff_factor = retry_interval # 重试间隔补偿系数
|
|
|
+ self.disable_debug_log = kwargs.pop('disable_debug_log', False)
|
|
|
+
|
|
|
+ self.session = requests.Session()
|
|
|
+ # 适配器 - 重试对象
|
|
|
+ retry = Retry(
|
|
|
+ total=self._max_retries,
|
|
|
+ backoff_factor=self._backoff_factor
|
|
|
+ )
|
|
|
+ # 适配器
|
|
|
+ adapter = HTTPAdapter(
|
|
|
+ pool_connections=DEFAULT_POOLSIZE,
|
|
|
+ pool_maxsize=DEFAULT_POOLSIZE,
|
|
|
+ max_retries=retry
|
|
|
+ )
|
|
|
+ self.session.mount('http://', adapter)
|
|
|
+ self.session.mount('https://', adapter)
|
|
|
|
|
|
@staticmethod
|
|
|
def prepare_params(**kw):
|
|
@@ -63,16 +81,14 @@ class Downloader:
|
|
|
return encoding
|
|
|
|
|
|
def _requests_by_get(self, url, **kw):
|
|
|
- max_retries = (kw.pop('max_retries', 3))
|
|
|
- disable_debug_log = kw.pop('disable_debug_log', True)
|
|
|
request_params = self.prepare_params(**kw)
|
|
|
response = Response()
|
|
|
response.encoding = 'utf-8'
|
|
|
response.status_code = 10001
|
|
|
- retries = 0
|
|
|
- while retries < max_retries:
|
|
|
+ ssl_retries = 2 # ssl证书验证,错误重试次数
|
|
|
+ while True:
|
|
|
try:
|
|
|
- response = self._session.get(url, **request_params)
|
|
|
+ response = self.session.get(url, **request_params)
|
|
|
# 解决重定向的网站
|
|
|
if response.status_code in REDIRECT_STATI:
|
|
|
request_params.update({'allow_redirects': True})
|
|
@@ -87,13 +103,17 @@ class Downloader:
|
|
|
if 'verify' in request_params:
|
|
|
del request_params['verify']
|
|
|
url = url.replace('https', 'http')
|
|
|
+ if ssl_retries <= 0:
|
|
|
+ break
|
|
|
+ ssl_retries -= 1
|
|
|
except requests.RequestException as e:
|
|
|
response.reason = e.__class__.__name__
|
|
|
- finally:
|
|
|
- retries += 1
|
|
|
- if not disable_debug_log:
|
|
|
+ break
|
|
|
+
|
|
|
+ if self.disable_debug_log:
|
|
|
t_name = threading.currentThread().getName()
|
|
|
- logger.debug(f'<{t_name}> - {url} - 响应 - {response}')
|
|
|
+ logger.debug(f'<{t_name}-Response> {response.status_code} - {url}')
|
|
|
+
|
|
|
return response
|
|
|
|
|
|
def get(self, url, **kw):
|
|
@@ -106,18 +126,6 @@ class Downloader:
|
|
|
"""
|
|
|
return self._requests_by_get(url, **kw)
|
|
|
|
|
|
- @property
|
|
|
- def _session(self):
|
|
|
- retry = Retry(
|
|
|
- total=self._max_retries,
|
|
|
- backoff_factor=self._backoff_factor
|
|
|
- )
|
|
|
- adapter = HTTPAdapter(max_retries=retry)
|
|
|
- session = requests.Session()
|
|
|
- session.mount('http://', adapter)
|
|
|
- session.mount('https://', adapter)
|
|
|
- return session
|
|
|
-
|
|
|
|
|
|
class RenderDownloader(Downloader):
|
|
|
|