|
@@ -37,9 +37,11 @@ def clear_file_type_suffix(filename: str, filetype: str):
|
|
|
|
|
|
class AttachmentDownloader:
|
|
|
|
|
|
- def __init__(self):
|
|
|
+ def __init__(self, max_retries=3):
|
|
|
self.dir_name = "file"
|
|
|
|
|
|
+ self._max_retries = max_retries
|
|
|
+
|
|
|
def create_file(self, filename, filetype):
|
|
|
os.makedirs(self.dir_name, mode=0o777, exist_ok=True)
|
|
|
file = "{filename}.{filetype}".format(
|
|
@@ -110,9 +112,9 @@ class AttachmentDownloader:
|
|
|
下载数据
|
|
|
|
|
|
:param str url: 文件下载地址
|
|
|
- :param proxies: 代理 {"http":"http://xxx", "https":"https://xxx"}
|
|
|
+ :param dict proxies: 代理 {"http":"http://xxx", "https":"https://xxx"}
|
|
|
:param file: 本地文件
|
|
|
- :param show_error_log: 展示错误堆栈信息日志
|
|
|
+ :param bool show_error_log: 展示错误堆栈信息日志
|
|
|
"""
|
|
|
method = kwargs.pop("method", "get")
|
|
|
|
|
@@ -123,19 +125,17 @@ class AttachmentDownloader:
|
|
|
request_kwargs.setdefault("data", kwargs.pop("data", None))
|
|
|
request_kwargs.setdefault("json", kwargs.pop("json", None))
|
|
|
request_kwargs.setdefault("cookies", kwargs.pop("cookies", None))
|
|
|
- request_kwargs.setdefault("timeout", kwargs.pop("timeout", (60,120)))
|
|
|
+ request_kwargs.setdefault("timeout", kwargs.pop("timeout", (60, 120)))
|
|
|
request_kwargs.setdefault("stream", kwargs.pop("stream", True))
|
|
|
request_kwargs.setdefault("verify", kwargs.pop("verify", False))
|
|
|
request_kwargs.setdefault("allow_redirects", kwargs.pop("allow_redirects", True))
|
|
|
|
|
|
stream = io.BytesIO()
|
|
|
- retries = 0
|
|
|
- while retries < 3:
|
|
|
+ for _ in range(self._max_retries):
|
|
|
try:
|
|
|
- with requests.request(method, url, **request_kwargs) as req:
|
|
|
- req.raise_for_status()
|
|
|
-
|
|
|
- lower_headers = {k.lower(): v for k, v in req.headers.items()}
|
|
|
+ with requests.request(method, url, **request_kwargs) as response:
|
|
|
+ response.raise_for_status()
|
|
|
+ lower_headers = {k.lower(): v for k, v in response.headers.items()}
|
|
|
content_length = lower_headers.get('content-length')
|
|
|
if content_length is not None:
|
|
|
content_length = self.calculate_size(int(content_length))
|
|
@@ -148,15 +148,15 @@ class AttachmentDownloader:
|
|
|
chunk_size = 1024 * 20 # 20KB chunks
|
|
|
downloaded_size = 0
|
|
|
with tqdm.tqdm(
|
|
|
- total=content_length,
|
|
|
- unit="B",
|
|
|
- initial=0,
|
|
|
- unit_scale=True,
|
|
|
- unit_divisor=1024, # 1M=1024Kb,单位换算
|
|
|
- ascii=True,
|
|
|
- desc=file) as bar:
|
|
|
-
|
|
|
- iter_content = req.iter_content(chunk_size=chunk_size)
|
|
|
+ total=content_length,
|
|
|
+ unit="B",
|
|
|
+ initial=0,
|
|
|
+ unit_scale=True,
|
|
|
+ unit_divisor=1024, # 1M=1024Kb,单位换算
|
|
|
+ ascii=True,
|
|
|
+ desc=file
|
|
|
+ ) as bar:
|
|
|
+ iter_content = response.iter_content(chunk_size=chunk_size)
|
|
|
if file is not None:
|
|
|
with open(file, "wb") as f:
|
|
|
for chunk in iter_content:
|
|
@@ -185,7 +185,6 @@ class AttachmentDownloader:
|
|
|
except requests.RequestException as why:
|
|
|
stream.truncate(0) # 截断流,保留前0个字节,即清空流
|
|
|
stream.seek(0) # 将位置指针移回流的开始处
|
|
|
- retries += 1
|
|
|
if show_error_log:
|
|
|
logger.exception(why)
|
|
|
|
|
@@ -256,9 +255,9 @@ class AttachmentDownloader:
|
|
|
return attachment
|
|
|
|
|
|
if len(stream) > 0:
|
|
|
- content_hash = get_sha1(stream)
|
|
|
+ hash_str = get_sha1(stream)
|
|
|
try:
|
|
|
- attachment["fid"] = "{}.{}".format(content_hash, filetype)
|
|
|
+ attachment["fid"] = "{}.{}".format(hash_str, filetype)
|
|
|
attachment["size"] = self.getsize(file)
|
|
|
attachment["ftype"] = filetype
|
|
|
attachment["url"] = "oss"
|