@@ -87,6 +87,8 @@ class Details(feapder.BiddingDetailSpider):
file_type = file_url.split("?")[0].split(".")[-1].lower()
if file_type not in files_info.get("files_type"):
file_type = file_name.split("?")[0].split(".")[-1].lower()
+ else:
+ file_type = files_info.get("file_type")
if file_type in files_info.get("files_type") and files_info.get("url_key") in file_url:
attachment = AttachmentDownloader().fetch_attachment(
@@ -339,7 +339,7 @@ def remove_htmldata(remove_info_list:list, html:str, response):
"""
过滤详情页无效数据
Args:
- remove_info_list: 需删除内容的xpath或文本 -> list [xpath,re,str]
+ remove_info_list: 需删除内容的xpath或文本 -> list [xpath,re,str] eg:['<re>data:image/(.*?)"',]
html: 待清洗文本
response: 原文响应体