Prechádzať zdrojové kódy

重定义HTTPAdapter

dongzhaorui 3 rokov pred
rodič
commit
116b22cfde
1 zmenil súbory, kde vykonal 29 pridanie a 10 odobranie
  1. 29 10
      find_source/crawler/download.py

+ 29 - 10
find_source/crawler/download.py

@@ -1,6 +1,8 @@
 import requests
 import urllib3
+from requests.adapters import HTTPAdapter
 from requests.models import Response, REDIRECT_STATI
+from urllib3.util.retry import Retry
 
 from config.load import headers
 
@@ -14,20 +16,23 @@ SPECIAL_ENCODINGS = [
 
 class Downloader:
 
-    def __init__(self):
-        self.timeout = 15
-        self.max_retries = 3
+    def __init__(self, connect=5, backoff_factor=0.1):
+        self._connect = connect
+        self._backoff_factor = backoff_factor
+        self._max_retries = 3
 
-    def prepare_params(self, **kw):
+    @staticmethod
+    def prepare_params(**kw):
         request_params = {}
         request_params.setdefault('allow_redirects', False)
-        request_params.setdefault('timeout', self.timeout)
+        request_params.setdefault('timeout', (kw.pop('timeout', None) or 10))
         for key, val in kw.items():
             if key != 'headers' and key in request_params:
                 request_params.update({key: val})
             else:
                 request_params.setdefault(key, val)
-        request_headers = (kw.get('headers') or headers)
+
+        request_headers = (kw.pop('headers', None) or headers)
         for key, val in request_headers.items():
             if key in request_headers:
                 request_headers.update({key: val})
@@ -40,11 +45,11 @@ class Downloader:
         request_params = self.prepare_params(**kw)
         response = Response()
         response.encoding = 'utf-8'
-        response.status_code = 10400
+        response.status_code = 10001
         retries = 0
-        while retries < self.max_retries:
+        while retries < self._max_retries:
             try:
-                response = requests.get(url, **request_params)
+                response = self._session.get(url, **request_params)
                 # 解决重定向的网站
                 if response.status_code in REDIRECT_STATI:
                     request_params.pop('allow_redirects')
@@ -56,8 +61,10 @@ class Downloader:
             except requests.exceptions.SSLError as e:
                 response.reason = e.__class__.__name__
                 if 'verify' not in request_params:
-                    request_params.setdefault('verify', True)
+                    request_params.setdefault('verify', False)
                 else:
+                    if 'verify' in request_params:
+                        del request_params['verify']
                     url = url.replace('https', 'http')
                 retries += 1
             except requests.RequestException as e:
@@ -74,3 +81,15 @@ class Downloader:
         :return: 响应对象
         """
         return self._requests_by_get(url, **kw)
+
+    @property
+    def _session(self):
+        retry = Retry(
+            connect=self._connect,
+            backoff_factor=0.1
+        )
+        adapter = HTTPAdapter(max_retries=retry)
+        session = requests.Session()
+        session.mount('http://', adapter)
+        session.mount('https://', adapter)
+        return session