|
@@ -364,3 +364,28 @@ def remove_htmldata(remove_info_list:list, html:str, response):
|
|
|
if extra_html:
|
|
|
html = html.replace(extra_html, '')
|
|
|
return html
|
|
|
+
|
|
|
+
|
|
|
+def extract_file_type(file_name="附件名", file_url="附件地址"):
|
|
|
+ """
|
|
|
+ 抽取附件类型
|
|
|
+ Args:
|
|
|
+ file_name: 附件名
|
|
|
+ file_url: 附件地址
|
|
|
+
|
|
|
+ Returns:
|
|
|
+
|
|
|
+ """
|
|
|
+ if file_name and file_url:
|
|
|
+ file_name = file_name.strip()
|
|
|
+ file_types = ['zip', 'docx', 'ftp', 'pdf', 'doc', 'rar', 'gzzb', 'hzzbs',
|
|
|
+ 'jpg', 'png', 'zbid', 'xls', 'xlsx', 'swp', 'dwg']
|
|
|
+
|
|
|
+ file_type = file_url.split('?')[0].split('.')[-1].lower()
|
|
|
+ if file_type not in file_types:
|
|
|
+ file_type = file_name.split('?')[0].split('.')[-1].lower()
|
|
|
+ if file_type in file_types:
|
|
|
+ return file_type
|
|
|
+ else:
|
|
|
+ return file_type
|
|
|
+ return None
|