Преглед изворни кода

更新下载器访问失败时返回的文本流和编码

dongzhaorui пре 1 година
родитељ
комит
a98172193e
1 измењених фајлова са 9 додато и 6 уклоњено
  1. 9 6
      find_source/crawler/download.py

+ 9 - 6
find_source/crawler/download.py

@@ -1,10 +1,5 @@
 import threading
 
-try:
-    import chardet
-except ImportError:
-    import charset_normalizer as chardet
-
 import requests
 import urllib3
 from loguru import logger
@@ -15,6 +10,11 @@ from urllib3.util.retry import Retry
 
 from config.load import headers
 
+try:
+    import chardet
+except ImportError:
+    import charset_normalizer as chardet
+
 urllib3.disable_warnings()
 
 '''特殊编码需要解码'''
@@ -83,7 +83,6 @@ class Downloader:
     def _requests_by_get(self, url, **kw):
         request_params = self.prepare_params(**kw)
         response = Response()
-        response.encoding = 'utf-8'
         response.status_code = 10001
         ssl_retries = 2  # ssl证书验证,错误重试次数
         while True:
@@ -114,6 +113,10 @@ class Downloader:
             t_name = threading.currentThread().getName()
             logger.debug(f'<{t_name}-Response> {response.status_code} - {url}')
 
+        if response.status_code == 10001:
+            response._content = b""
+            response.encoding = 'utf-8'
+
         return response
 
     def get(self, url, **kw):