|
@@ -1,6 +1,8 @@
|
|
|
import requests
|
|
|
import urllib3
|
|
|
+from requests.adapters import HTTPAdapter
|
|
|
from requests.models import Response, REDIRECT_STATI
|
|
|
+from urllib3.util.retry import Retry
|
|
|
|
|
|
from config.load import headers
|
|
|
|
|
@@ -14,20 +16,23 @@ SPECIAL_ENCODINGS = [
|
|
|
|
|
|
class Downloader:
|
|
|
|
|
|
- def __init__(self):
|
|
|
- self.timeout = 15
|
|
|
- self.max_retries = 3
|
|
|
+ def __init__(self, connect=5, backoff_factor=0.1):
|
|
|
+ self._connect = connect
|
|
|
+ self._backoff_factor = backoff_factor
|
|
|
+ self._max_retries = 3
|
|
|
|
|
|
- def prepare_params(self, **kw):
|
|
|
+ @staticmethod
|
|
|
+ def prepare_params(**kw):
|
|
|
request_params = {}
|
|
|
request_params.setdefault('allow_redirects', False)
|
|
|
- request_params.setdefault('timeout', self.timeout)
|
|
|
+ request_params.setdefault('timeout', (kw.pop('timeout', None) or 10))
|
|
|
for key, val in kw.items():
|
|
|
if key != 'headers' and key in request_params:
|
|
|
request_params.update({key: val})
|
|
|
else:
|
|
|
request_params.setdefault(key, val)
|
|
|
- request_headers = (kw.get('headers') or headers)
|
|
|
+
|
|
|
+ request_headers = (kw.pop('headers', None) or headers)
|
|
|
for key, val in request_headers.items():
|
|
|
if key in request_headers:
|
|
|
request_headers.update({key: val})
|
|
@@ -40,11 +45,11 @@ class Downloader:
|
|
|
request_params = self.prepare_params(**kw)
|
|
|
response = Response()
|
|
|
response.encoding = 'utf-8'
|
|
|
- response.status_code = 10400
|
|
|
+ response.status_code = 10001
|
|
|
retries = 0
|
|
|
- while retries < self.max_retries:
|
|
|
+ while retries < self._max_retries:
|
|
|
try:
|
|
|
- response = requests.get(url, **request_params)
|
|
|
+ response = self._session.get(url, **request_params)
|
|
|
# 解决重定向的网站
|
|
|
if response.status_code in REDIRECT_STATI:
|
|
|
request_params.pop('allow_redirects')
|
|
@@ -56,8 +61,10 @@ class Downloader:
|
|
|
except requests.exceptions.SSLError as e:
|
|
|
response.reason = e.__class__.__name__
|
|
|
if 'verify' not in request_params:
|
|
|
- request_params.setdefault('verify', True)
|
|
|
+ request_params.setdefault('verify', False)
|
|
|
else:
|
|
|
+ if 'verify' in request_params:
|
|
|
+ del request_params['verify']
|
|
|
url = url.replace('https', 'http')
|
|
|
retries += 1
|
|
|
except requests.RequestException as e:
|
|
@@ -74,3 +81,15 @@ class Downloader:
|
|
|
:return: 响应对象
|
|
|
"""
|
|
|
return self._requests_by_get(url, **kw)
|
|
|
+
|
|
|
+ @property
|
|
|
+ def _session(self):
|
|
|
+ retry = Retry(
|
|
|
+ connect=self._connect,
|
|
|
+ backoff_factor=0.1
|
|
|
+ )
|
|
|
+ adapter = HTTPAdapter(max_retries=retry)
|
|
|
+ session = requests.Session()
|
|
|
+ session.mount('http://', adapter)
|
|
|
+ session.mount('https://', adapter)
|
|
|
+ return session
|