Эх сурвалжийг харах

更新下载器访问失败时返回的文本流和编码

dongzhaorui 1 жил өмнө
parent
commit
a98172193e

+ 9 - 6
find_source/crawler/download.py

@@ -1,10 +1,5 @@
 import threading
 import threading
 
 
-try:
-    import chardet
-except ImportError:
-    import charset_normalizer as chardet
-
 import requests
 import requests
 import urllib3
 import urllib3
 from loguru import logger
 from loguru import logger
@@ -15,6 +10,11 @@ from urllib3.util.retry import Retry
 
 
 from config.load import headers
 from config.load import headers
 
 
+try:
+    import chardet
+except ImportError:
+    import charset_normalizer as chardet
+
 urllib3.disable_warnings()
 urllib3.disable_warnings()
 
 
 '''特殊编码需要解码'''
 '''特殊编码需要解码'''
@@ -83,7 +83,6 @@ class Downloader:
     def _requests_by_get(self, url, **kw):
     def _requests_by_get(self, url, **kw):
         request_params = self.prepare_params(**kw)
         request_params = self.prepare_params(**kw)
         response = Response()
         response = Response()
-        response.encoding = 'utf-8'
         response.status_code = 10001
         response.status_code = 10001
         ssl_retries = 2  # ssl证书验证,错误重试次数
         ssl_retries = 2  # ssl证书验证,错误重试次数
         while True:
         while True:
@@ -114,6 +113,10 @@ class Downloader:
             t_name = threading.currentThread().getName()
             t_name = threading.currentThread().getName()
             logger.debug(f'<{t_name}-Response> {response.status_code} - {url}')
             logger.debug(f'<{t_name}-Response> {response.status_code} - {url}')
 
 
+        if response.status_code == 10001:
+            response._content = b""
+            response.encoding = 'utf-8'
+
         return response
         return response
 
 
     def get(self, url, **kw):
     def get(self, url, **kw):