Browse Source

设置全局的请求实例控制属性

dongzhaorui 3 years ago
parent
commit
338a98a41c
1 changed files with 22 additions and 18 deletions
  1. 22 18
      find_source/crawler/download.py

+ 22 - 18
find_source/crawler/download.py

@@ -9,28 +9,38 @@ urllib3.disable_warnings()
 
 class Downloader:
 
-    @staticmethod
-    def _requests_by_get(url, max_retries=3, **kw):
+    def __init__(self):
+        self.timeout = 15
+        self.allow_redirects = False
+        self.max_retries = 3
+
+    def prepare_params(self, **kw):
         request_params = {}
-        request_params.setdefault('allow_redirects', False)
-        request_params.setdefault('timeout', 15)
-        request_params.setdefault('headers', headers)
+        request_params.setdefault('allow_redirects', self.allow_redirects)
+        request_params.setdefault('timeout', self.timeout)
         for key, val in kw.items():
-            if key in request_params:
+            if key != 'headers' and key in request_params:
                 request_params.update({key: val})
             else:
                 request_params.setdefault(key, val)
+        request_headers = (kw.get('headers') or headers)
+        for key, val in request_headers.items():
+            if key in request_headers:
+                request_headers.update({key: val})
+            else:
+                request_headers.setdefault(key, val)
+        request_params.setdefault('headers', request_headers)
+        return request_params
 
-        retries = 0
+    def _requests_by_get(self, url, **kw):
+        request_params = self.prepare_params(**kw)
         response = Response()
         response.encoding = 'utf-8'
         response.status_code = 10400
-        setattr(response, 'loop_times', 1)  # 异常通信频次默认值1
-        while retries < max_retries:
+        retries = 0
+        while retries < self.max_retries:
             try:
                 response = requests.get(url, **request_params)
-                response.encoding = response.apparent_encoding
-                setattr(response, 'loop_times', 0)  # 正常通信频次默认值0
                 break
             except requests.exceptions.SSLError as e:
                 response.reason = e.__class__.__name__
@@ -49,13 +59,7 @@ class Downloader:
         网络请求
 
         :param url: 访问地址
-        :param kw: 额外请求参数
+        :param kw: requests.GET请求参数
         :return: 响应对象
         """
         return self._requests_by_get(url, **kw)
-
-
-# if __name__ == '__main__':
-    # r = Downloader().get('https://news.lzu.edu.cn/c/202204/88967.html')
-    # r = Downloader().get('http://www.xxls.gov.cn/')
-    # print(r, r.loop_times)