瀏覽代碼

更新下载器访问失败时返回的文本流和编码

dongzhaorui 1 年之前
父節點
當前提交
cbf8372f8c
共有 1 個文件被更改,包括 16 次插入8 次删除
  1. 16 8
      find_source/crawler/download.py

+ 16 - 8
find_source/crawler/download.py

@@ -82,8 +82,10 @@ class Downloader:
 
     def _requests_by_get(self, url, **kw):
         request_params = self.prepare_params(**kw)
-        response = Response()
-        response.status_code = 10001
+
+        response = None  # 请求响应
+        reason = ""  # 错误原因
+
         ssl_retries = 2  # ssl证书验证,错误重试次数
         while True:
             try:
@@ -92,31 +94,37 @@ class Downloader:
                 if response.status_code in REDIRECT_STATI:
                     request_params.update({'allow_redirects': True})
                     continue
+
                 response.encoding = self.apparent_encoding(response)
                 break
             except requests.exceptions.SSLError as e:
-                response.reason = e.__class__.__name__
+                reason = e.__class__.__name__
                 if 'verify' not in request_params:
                     request_params.setdefault('verify', False)
                 else:
                     if 'verify' in request_params:
                         del request_params['verify']
                     url = url.replace('https', 'http')
+
                 if ssl_retries <= 0:
                     break
+
                 ssl_retries -= 1
             except requests.RequestException as e:
-                response.reason = e.__class__.__name__
+                reason = e.__class__.__name__
                 break
 
+        if response is None:
+            response = Response()
+            response.status_code = 10001
+            response.encoding = 'utf-8'  # 设置默认编码
+            response._content = None  # 设置默认响应文本流
+            response.reason = reason
+
         if self.disable_debug_log:
             t_name = threading.currentThread().getName()
             logger.debug(f'<{t_name}-Response> {response.status_code} - {url}')
 
-        if response.status_code == 10001:
-            response._content = b""
-            response.encoding = 'utf-8'
-
         return response
 
     def get(self, url, **kw):