|
@@ -16,8 +16,7 @@ import urllib3
|
|
|
|
|
|
import feapder.utils.tools as tools
|
|
import feapder.utils.tools as tools
|
|
from feapder.utils.log import log as logger
|
|
from feapder.utils.log import log as logger
|
|
-from untils.aliyun import AliYunService
|
|
|
|
-from untils.execptions import AttachmentNullError
|
|
|
|
|
|
+from feapder.utils.oss import JyOssClient, OssBucketClient
|
|
|
|
|
|
urllib3.disable_warnings()
|
|
urllib3.disable_warnings()
|
|
|
|
|
|
@@ -34,10 +33,12 @@ def clear_file_type_suffix(filename: str, filetype: str):
|
|
return filename
|
|
return filename
|
|
|
|
|
|
|
|
|
|
-class AttachmentDownloader:
|
|
|
|
|
|
+class Downloader:
|
|
|
|
|
|
def __init__(self):
|
|
def __init__(self):
|
|
self.dir_name = "file"
|
|
self.dir_name = "file"
|
|
|
|
+ # self._oss = JyOssClient()
|
|
|
|
+ self._bucket = OssBucketClient()
|
|
|
|
|
|
def create_file(self, filename, filetype):
|
|
def create_file(self, filename, filetype):
|
|
os.makedirs(self.dir_name, mode=0o777, exist_ok=True)
|
|
os.makedirs(self.dir_name, mode=0o777, exist_ok=True)
|
|
@@ -104,6 +105,22 @@ class AttachmentDownloader:
|
|
result = "{:.1f} M".format(_M)
|
|
result = "{:.1f} M".format(_M)
|
|
return result
|
|
return result
|
|
|
|
|
|
|
|
+ @staticmethod
|
|
|
|
+ def read_pdf_by_chunks(f, chunk_size=1024):
|
|
|
|
+ try:
|
|
|
|
+ with open(f, 'rb') as file:
|
|
|
|
+ chunk = file.read(chunk_size)
|
|
|
|
+ if "<</Names <</Dests 4 0 R>>" in str(chunk) and "SourceModified" in str(chunk):
|
|
|
|
+ return False
|
|
|
|
+ elif "doctypehtml" not in str(chunk):
|
|
|
|
+ return True
|
|
|
|
+ elif "%PDF" in str(chunk):
|
|
|
|
+ return True
|
|
|
|
+ else:
|
|
|
|
+ return False
|
|
|
|
+ except Exception as e:
|
|
|
|
+ return False
|
|
|
|
+
|
|
def fetch_data(self, url, proxies=None, file=None, show_error_log=False, **kwargs):
|
|
def fetch_data(self, url, proxies=None, file=None, show_error_log=False, **kwargs):
|
|
"""
|
|
"""
|
|
下载数据
|
|
下载数据
|
|
@@ -198,6 +215,7 @@ class AttachmentDownloader:
|
|
:param str filetype: 文件类型
|
|
:param str filetype: 文件类型
|
|
:param str url: 文件下载地址
|
|
:param str url: 文件下载地址
|
|
"""
|
|
"""
|
|
|
|
+ gzip = kwargs.pop("gzip", False)
|
|
stream = self.fetch_data(url, file=None, **kwargs)
|
|
stream = self.fetch_data(url, file=None, **kwargs)
|
|
attachment = {
|
|
attachment = {
|
|
"filename": "{}.{}".format(filename, filetype),
|
|
"filename": "{}.{}".format(filename, filetype),
|
|
@@ -210,7 +228,8 @@ class AttachmentDownloader:
|
|
attachment["fid"] = "{}.{}".format(fid, filetype)
|
|
attachment["fid"] = "{}.{}".format(fid, filetype)
|
|
attachment["size"] = self.getsize(stream)
|
|
attachment["size"] = self.getsize(stream)
|
|
attachment["url"] = "oss"
|
|
attachment["url"] = "oss"
|
|
- AliYunService().push_oss_from_stream(attachment["fid"], stream)
|
|
|
|
|
|
+ # self._oss.upload("file", attachment["fid"], stream, gzip=gzip)
|
|
|
|
+ self._bucket.put_object(attachment["fid"], stream)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
logger.error(
|
|
logger.error(
|
|
"[{}]上传失败,原因:{}".format(filename, e.__class__.__name__)
|
|
"[{}]上传失败,原因:{}".format(filename, e.__class__.__name__)
|
|
@@ -218,21 +237,6 @@ class AttachmentDownloader:
|
|
|
|
|
|
return attachment
|
|
return attachment
|
|
|
|
|
|
- def read_pdf_in_chunks(self, pdf_path, chunk_size=1024):
|
|
|
|
- try:
|
|
|
|
- with open(pdf_path, 'rb') as file:
|
|
|
|
- chunk = file.read(chunk_size)
|
|
|
|
- if "<</Names <</Dests 4 0 R>>" in str(chunk) and "SourceModified" in str(chunk):
|
|
|
|
- return False
|
|
|
|
- elif "doctypehtml" not in str(chunk):
|
|
|
|
- return True
|
|
|
|
- elif "%PDF" in str(chunk):
|
|
|
|
- return True
|
|
|
|
- else:
|
|
|
|
- return False
|
|
|
|
- except Exception as e:
|
|
|
|
- return False
|
|
|
|
-
|
|
|
|
def _push_oss_from_local(self, filename, filetype, url, **kwargs):
|
|
def _push_oss_from_local(self, filename, filetype, url, **kwargs):
|
|
"""
|
|
"""
|
|
上传本地文件到oss
|
|
上传本地文件到oss
|
|
@@ -241,6 +245,7 @@ class AttachmentDownloader:
|
|
:param str filetype: 文件类型
|
|
:param str filetype: 文件类型
|
|
:param str url: 文件下载地址
|
|
:param str url: 文件下载地址
|
|
"""
|
|
"""
|
|
|
|
+ gzip = kwargs.pop("gzip", False)
|
|
file = self.create_file(filename, filetype)
|
|
file = self.create_file(filename, filetype)
|
|
stream = self.fetch_data(url, file=file, **kwargs)
|
|
stream = self.fetch_data(url, file=file, **kwargs)
|
|
'''上传/下载,无论失败成功都需要返回文件基础信息'''
|
|
'''上传/下载,无论失败成功都需要返回文件基础信息'''
|
|
@@ -250,7 +255,7 @@ class AttachmentDownloader:
|
|
}
|
|
}
|
|
|
|
|
|
if kwargs.get('is_check', None):
|
|
if kwargs.get('is_check', None):
|
|
- if not self.read_pdf_in_chunks(file):
|
|
|
|
|
|
+ if not self.read_pdf_by_chunks(file):
|
|
self.remove(file)
|
|
self.remove(file)
|
|
return attachment
|
|
return attachment
|
|
|
|
|
|
@@ -261,7 +266,8 @@ class AttachmentDownloader:
|
|
attachment["size"] = self.getsize(file)
|
|
attachment["size"] = self.getsize(file)
|
|
attachment["ftype"] = filetype
|
|
attachment["ftype"] = filetype
|
|
attachment["url"] = "oss"
|
|
attachment["url"] = "oss"
|
|
- AliYunService().push_oss_from_local(attachment["fid"], file)
|
|
|
|
|
|
+ # self._oss.upload("file", attachment["fid"], stream, gzip=gzip)
|
|
|
|
+ self._bucket.put_object_from_file(attachment["fid"], file)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
logger.error(
|
|
logger.error(
|
|
"[{}]上传失败,原因:{}".format(filename, e.__class__.__name__)
|
|
"[{}]上传失败,原因:{}".format(filename, e.__class__.__name__)
|
|
@@ -270,27 +276,18 @@ class AttachmentDownloader:
|
|
self.remove(file) # 删除本地临时文件
|
|
self.remove(file) # 删除本地临时文件
|
|
return attachment
|
|
return attachment
|
|
|
|
|
|
- def fetch_attachment(
|
|
|
|
- self,
|
|
|
|
- file_name: str,
|
|
|
|
- file_type: str,
|
|
|
|
- download_url: str,
|
|
|
|
- mode="local",
|
|
|
|
- proxies=None,
|
|
|
|
- **kwargs
|
|
|
|
- ):
|
|
|
|
|
|
+ def fetch_attachment(self, file_name, file_type, download_url, mode="local", proxies=None, gzip=False, **kwargs):
|
|
"""
|
|
"""
|
|
下载附件
|
|
下载附件
|
|
|
|
|
|
- @param file_name: 文件名称
|
|
|
|
- @param file_type: 文件类型
|
|
|
|
- @param download_url: 文件下载地址
|
|
|
|
- @param mode: 附件上传模式 "local" = 本地文件 or "stream" = 数据流
|
|
|
|
- @param proxies: 代理 {"http":"http://xxx", "https":"https://xxx"}
|
|
|
|
- @return:
|
|
|
|
|
|
+ @param str file_name: 文件名称
|
|
|
|
+ @param str file_type: 文件类型
|
|
|
|
+ @param str download_url: 文件下载地址
|
|
|
|
+ @param str mode: 附件上传模式; "local" = 本地文件; "stream" = 数据流
|
|
|
|
+ @param bool gzip: 是否压缩
|
|
|
|
+ @param dict proxies: 代理 {"http":"http://xxx", "https":"https://xxx"}
|
|
|
|
+ @return: {"fid":"", "filename":"", "url":"oss", "size":"", "ftype":"", "org_url":""}
|
|
"""
|
|
"""
|
|
- if not file_name or not file_type or not download_url:
|
|
|
|
- raise AttachmentNullError
|
|
|
|
|
|
|
|
file_name = clear_file_type_suffix(file_name, file_type) # 防止文件后缀重复
|
|
file_name = clear_file_type_suffix(file_name, file_type) # 防止文件后缀重复
|
|
file_kwargs = dict(
|
|
file_kwargs = dict(
|
|
@@ -298,6 +295,7 @@ class AttachmentDownloader:
|
|
filetype=file_type,
|
|
filetype=file_type,
|
|
url=download_url,
|
|
url=download_url,
|
|
proxies=proxies,
|
|
proxies=proxies,
|
|
|
|
+ gzip=gzip,
|
|
**kwargs
|
|
**kwargs
|
|
)
|
|
)
|
|
if mode == "stream":
|
|
if mode == "stream":
|
|
@@ -305,3 +303,6 @@ class AttachmentDownloader:
|
|
else:
|
|
else:
|
|
attachment = self._push_oss_from_local(**file_kwargs)
|
|
attachment = self._push_oss_from_local(**file_kwargs)
|
|
return attachment
|
|
return attachment
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+AttachmentDownloader = Downloader
|