|
@@ -1,23 +1,26 @@
|
|
import requests
|
|
import requests
|
|
import urllib3
|
|
import urllib3
|
|
-from requests.models import Response
|
|
|
|
|
|
+from requests.models import Response, REDIRECT_STATI
|
|
|
|
|
|
from config.load import headers
|
|
from config.load import headers
|
|
-from settings import SPECIAL_ENCODINGS
|
|
|
|
|
|
|
|
urllib3.disable_warnings()
|
|
urllib3.disable_warnings()
|
|
|
|
|
|
|
|
+'''特殊编码需要解码'''
|
|
|
|
+SPECIAL_ENCODINGS = [
|
|
|
|
+ 'Windows-1254'
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
|
|
class Downloader:
|
|
class Downloader:
|
|
|
|
|
|
def __init__(self):
|
|
def __init__(self):
|
|
self.timeout = 15
|
|
self.timeout = 15
|
|
- self.allow_redirects = False
|
|
|
|
self.max_retries = 3
|
|
self.max_retries = 3
|
|
|
|
|
|
def prepare_params(self, **kw):
|
|
def prepare_params(self, **kw):
|
|
request_params = {}
|
|
request_params = {}
|
|
- request_params.setdefault('allow_redirects', self.allow_redirects)
|
|
|
|
|
|
+ request_params.setdefault('allow_redirects', False)
|
|
request_params.setdefault('timeout', self.timeout)
|
|
request_params.setdefault('timeout', self.timeout)
|
|
for key, val in kw.items():
|
|
for key, val in kw.items():
|
|
if key != 'headers' and key in request_params:
|
|
if key != 'headers' and key in request_params:
|
|
@@ -42,6 +45,10 @@ class Downloader:
|
|
while retries < self.max_retries:
|
|
while retries < self.max_retries:
|
|
try:
|
|
try:
|
|
response = requests.get(url, **request_params)
|
|
response = requests.get(url, **request_params)
|
|
|
|
+ # 解决重定向的网站
|
|
|
|
+ if response.status_code in REDIRECT_STATI:
|
|
|
|
+ request_params.pop('allow_redirects')
|
|
|
|
+ continue
|
|
response.encoding = response.apparent_encoding
|
|
response.encoding = response.apparent_encoding
|
|
if response.encoding in SPECIAL_ENCODINGS:
|
|
if response.encoding in SPECIAL_ENCODINGS:
|
|
response.encoding = 'utf-8'
|
|
response.encoding = 'utf-8'
|