Jelajahi Sumber

下载模块

dongzhaorui 3 tahun lalu
induk
melakukan
04d6bb8dcb
1 mengubah file dengan 67 tambahan dan 0 penghapusan
  1. 67 0
      find_source/crawler/downloader.py

+ 67 - 0
find_source/crawler/downloader.py

@@ -0,0 +1,67 @@
+import requests
+import urllib3
+from requests.models import Response
+
+from config.load import headers
+
+urllib3.disable_warnings()
+
+
+class Downloader:
+
+    @staticmethod
+    def _requests_by_get(url, max_retries=3, **kw):
+        request_params = {}
+        request_params.setdefault('allow_redirects', False)
+        request_params.setdefault('timeout', 15)
+        request_params.setdefault('headers', headers)
+        for key, val in kw.items():
+            if key == 'headers':
+                for k, v in val.items():
+                    if str(k).lower() == 'user-agent':
+                        del headers['User-Agent']
+                headers.update(val)
+                request_params.update({'headers': headers})
+            elif key in request_params:
+                request_params.update({key: val})
+            else:
+                request_params.setdefault(key, val)
+
+        retries = 0
+        response = Response()
+        response.encoding = 'utf-8'
+        setattr(response, 'loop_times', 1)  # 异常通信频次默认值1
+        while retries < max_retries:
+            try:
+                response = requests.get(url, **request_params)
+                setattr(response, 'loop_times', 0)  # 正常通信频次默认值0
+                break
+            except requests.exceptions.SSLError as e:
+                response.status_code = e.response.status_code
+                response.reason = e.__class__.__name__
+                if 'verify' not in request_params:
+                    request_params.setdefault('verify', True)
+                retries += 1
+            except requests.RequestException as e:
+                if e.response is not None:
+                    response.status_code = e.response.status_code
+                else:
+                    response.status_code = 10400
+                response.reason = e.__class__.__name__
+                retries += 1
+        return response
+
+    def get(self, url, **kw):
+        """
+        网络请求
+
+        :param url: 访问地址
+        :param kw: 额外请求参数
+        :return: 响应对象
+        """
+        return self._requests_by_get(url, **kw)
+
+
+# if __name__ == '__main__':
+#     r = Downloader().get('http://httpbin.org/status/401')
+#     print(r, r.loop_times)