浏览代码

爬虫维护

lizongze 8 月之前
父节点
当前提交
3b2acbd7ec

+ 29 - 21
中车购供应链电商协同平台/zcggyldsxtpt_details.py

@@ -72,32 +72,40 @@ class Details(feapder.BiddingDetailSpider):
             pm.update(get_pm())
             request.params = pm
 
-
     def detail_get(self, request, response):
 
         items = request.item
         list_item = DataBakItem(**items)
 
-        html = response.json.get('data').get('a2w9_content_tag') or response.json.get('data').get('a2w9_notice_content_tag')
-
-        list_item.contenthtml = html.replace('窗体底端','')
-
-        # 附件下载需登录 attachmentList
-        # file_list = response.json.get('data').get('a2w9_attach_files')
-        # if file_list:
-        #     attachments = {}
-        #     for info in file_list:
-        #         file_name = info.get('name')
-        #         file_url = info.get('downloadUrl')
-        #         file_type = info.get('type')
-        #
-        #         attachment = AttachmentDownloader().fetch_attachment(
-        #             file_name=file_name, file_type=file_type, download_url=file_url,
-        #             cookies=request.cookies)
-        #         attachments[str(len(attachments) + 1)] = attachment
-        #
-        #     if attachments:
-        #         list_item.projectinfo = {"attachments": attachments}
+        dt = response.json.get('data')
+        html = dt.get('a2w9_content_tag') or dt.get('a2w9_notice_content_tag')
+
+        # 附件下载需登录
+        attachments = {}
+        file_list = dt.get('a2w9_attach_files') or dt.get('attachmentList')
+        if file_list:
+            for info in file_list:
+                file_name = info.get('name')
+                file_url = info.get('downloadUrl')
+                file_type = info.get('type')
+
+                if "sso.crrcgo.cc/login" not in file_url:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url)
+
+                    if attachment.__contains__("fid"):
+                        attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        if not html and attachments:
+            html = "详情请访问原网页!"
+        elif not html and not attachments:
+            '''无正文、无附件'''
+            return
+
+        list_item.contenthtml = html.replace('窗体底端', '')
 
         yield list_item
 

+ 2 - 2
易交易电子招标投标交易平台/政府采购-列表页.py

@@ -46,8 +46,8 @@ class Feapder(feapder.BiddingListSpider):
         self.site = "易交易电子招标投标交易平台"
 
         self.menus = [
-            Menu('招标信息-工程建设', 'a_yjydzzbtbjypt_zbxx_zfcg', 'A', 1),
-            Menu('招标信息-企业采购', 'a_yjydzzbtbjypt_zbxx_qycg', '', 1),
+            Menu('招标信息-工程建设', 'a_yjydzzbtbjypt_zbxx_gcjs', 'A', 1),
+            Menu('招标信息-企业采购', 'a_yjydzzbtbjypt_zbxx_zfcg', '', 1),
             Menu('土地矿业', 'a_yjydzzbtbjypt_tdky', 'B', 1),
             Menu('产权交易', 'a_yjydzzbtbjypt_cqjy', 'C', 1),
             # Menu('其他', 'a_yjydzzbtbjypt_qt', 'Z', 1),  # 暂无数据