Эх сурвалжийг харах

fixbug - 下载器(session)会话异常时阻塞问题修复

dongzhaorui 2 жил өмнө
parent
commit
55d6faa422

+ 32 - 24
find_source/crawler/download.py

@@ -22,13 +22,31 @@ SPECIAL_ENCODINGS = [
     'Windows-1254',
     'ISO-8859-1'
 ]
+'''每个Session连接池大小'''
+DEFAULT_POOLSIZE = 10
 
 
 class Downloader:
 
-    def __init__(self, max_retries=3, retry_interval=0.1):
-        self._max_retries = max_retries
-        self._backoff_factor = retry_interval
+    def __init__(self, max_retries=3, retry_interval=0, **kwargs):
+        self._max_retries = max_retries  # 请求错误时的最大重试次数
+        self._backoff_factor = retry_interval  # 重试间隔补偿系数
+        self.disable_debug_log = kwargs.pop('disable_debug_log', False)
+
+        self.session = requests.Session()
+        # 适配器 - 重试对象
+        retry = Retry(
+            total=self._max_retries,
+            backoff_factor=self._backoff_factor
+        )
+        # 适配器
+        adapter = HTTPAdapter(
+            pool_connections=DEFAULT_POOLSIZE,
+            pool_maxsize=DEFAULT_POOLSIZE,
+            max_retries=retry
+        )
+        self.session.mount('http://', adapter)
+        self.session.mount('https://', adapter)
 
     @staticmethod
     def prepare_params(**kw):
@@ -63,16 +81,14 @@ class Downloader:
         return encoding
 
     def _requests_by_get(self, url, **kw):
-        max_retries = (kw.pop('max_retries', 3))
-        disable_debug_log = kw.pop('disable_debug_log', True)
         request_params = self.prepare_params(**kw)
         response = Response()
         response.encoding = 'utf-8'
         response.status_code = 10001
-        retries = 0
-        while retries < max_retries:
+        ssl_retries = 2  # ssl证书验证,错误重试次数
+        while True:
             try:
-                response = self._session.get(url, **request_params)
+                response = self.session.get(url, **request_params)
                 # 解决重定向的网站
                 if response.status_code in REDIRECT_STATI:
                     request_params.update({'allow_redirects': True})
@@ -87,13 +103,17 @@ class Downloader:
                     if 'verify' in request_params:
                         del request_params['verify']
                     url = url.replace('https', 'http')
+                if ssl_retries <= 0:
+                    break
+                ssl_retries -= 1
             except requests.RequestException as e:
                 response.reason = e.__class__.__name__
-            finally:
-                retries += 1
-        if not disable_debug_log:
+                break
+
+        if self.disable_debug_log:
             t_name = threading.currentThread().getName()
-            logger.debug(f'<{t_name}> - {url} - 响应 - {response}')
+            logger.debug(f'<{t_name}-Response> {response.status_code} - {url}')
+
         return response
 
     def get(self, url, **kw):
@@ -106,18 +126,6 @@ class Downloader:
         """
         return self._requests_by_get(url, **kw)
 
-    @property
-    def _session(self):
-        retry = Retry(
-            total=self._max_retries,
-            backoff_factor=self._backoff_factor
-        )
-        adapter = HTTPAdapter(max_retries=retry)
-        session = requests.Session()
-        session.mount('http://', adapter)
-        session.mount('https://', adapter)
-        return session
-
 
 class RenderDownloader(Downloader):