lizongze vor 2 Jahren
Ursprung
Commit
25b3652270

+ 2 - 0
FworkSpider/feapder/templates/detail_template.tmpl

@@ -87,6 +87,8 @@ class Details(feapder.BiddingDetailSpider):
                             file_type = file_url.split("?")[0].split(".")[-1].lower()
                             if file_type not in files_info.get("files_type"):
                                 file_type = file_name.split("?")[0].split(".")[-1].lower()
+                        else:
+                            file_type = files_info.get("file_type")
 
                         if file_type in files_info.get("files_type") and files_info.get("url_key") in file_url:
                             attachment = AttachmentDownloader().fetch_attachment(

+ 1 - 1
FworkSpider/untils/tools.py

@@ -339,7 +339,7 @@ def remove_htmldata(remove_info_list:list, html:str, response):
     """
         过滤详情页无效数据
     Args:
-        remove_info_list: 需删除内容的xpath或文本 -> list [xpath,re,str]
+        remove_info_list: 需删除内容的xpath或文本 -> list [xpath,re,str] eg:['<re>data:image/(.*?)"',]
         html: 待清洗文本
         response: 原文响应体