dzr 3 viikkoa sitten
vanhempi
commit
b79c670dc1

+ 28 - 38
zj_zjstzxmzxspjgpt_pppzdtjxm_njpc/浙江省投资项目在线审批监管平台-列表页.py

@@ -6,39 +6,34 @@ Created on 2025-04-15
 ---------
 @author: lzz
 """
-import feapder
-from items.njpc_item import NjpcListItem
 from collections import namedtuple
 
+import feapder
+from items.njpc_item import NjpcListItem
 
 
-
-class Njpc_Feapder(feapder.PlanToBuildListSpider):
+class Spider(feapder.PlanToBuildListSpider):
 
     def start_callback(self):
         Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
-
         self.site = "浙江省投资项目在线审批监管平台"
-
         self.menus = [
-            Menu('审批和监管事项办理结果公示', 'zj_zjstzxmzxspjgpt_pppzdtjxm_njpc', 60),
+            Menu('审批和监管事项办理结果公示', 'zj_zjstzxmzxspjgpt_pppzdtjxm_njpc', 50),
         ]
-
         self.headers = {
             "Accept": "application/json, text/javascript, */*; q=0.01",
             "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
-            "Connection": "keep-alive",
             "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
             "Origin": "https://tzxm.zjzwfw.gov.cn",
             "Referer": "https://tzxm.zjzwfw.gov.cn/tzxmweb/zwtpages/resultsPublicity/notice_of_publicity_new.html?page=1",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
             "X-Requested-With": "XMLHttpRequest",
         }
 
     def start_requests(self):
-        for menu in self.menus:
-            start_url = "https://tzxm.zjzwfw.gov.cn/publicannouncement.do?method=itemList"
-            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+        url = "https://tzxm.zjzwfw.gov.cn/publicannouncement.do?method=itemList"
+        for menu, page in self.product():
+            yield feapder.Request(url, item=menu._asdict(), page=page)
 
     def download_midware(self, request):
         page = request.page
@@ -54,7 +49,6 @@ class Njpc_Feapder(feapder.PlanToBuildListSpider):
         request.headers = self.headers
 
     def parse(self, request, response):
-
         menu = request.item
         info_list = response.json[0].get('itemList')
         for info in info_list:
@@ -69,33 +63,29 @@ class Njpc_Feapder(feapder.PlanToBuildListSpider):
             city = ""       # 城市
             district = ""
 
-            data_item = NjpcListItem()  # 存储数据的管道
-            data_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
-            data_item.unique_key = ('href', publish_time)
-            data_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
-            data_item.projectname = projectname      # 项目名称
-            data_item.publishtime = publish_time     # 发布时间
-            data_item.approvecode = deal_code
-            data_item.approvecontent = info.get('ITEM_NAME')
-
-            data_item.site = self.site
-            data_item.area = area or "全国"  # 城市默认:全国
-            data_item.city = city  # 城市 默认为空
-            data_item.district = district  # 城市 默认为空
-            data_item.parser_url = "https://tzxm.zjzwfw.gov.cn/publicannouncement.do?method=projectDetail" # 详情页数据链接
-            data_item.href = detail_href  # 详情链接
-            data_item.parser = "detail_get"
+            list_item = NjpcListItem()  # 存储数据的管道
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.unique_key = ('href', publish_time)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.projectname = projectname      # 项目名称
+            list_item.publishtime = publish_time     # 发布时间
+            list_item.approvecode = deal_code
+            list_item.approvecontent = info.get('ITEM_NAME')
+
+            list_item.site = self.site
+            list_item.area = area or "全国"  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+            list_item.district = district  # 城市 默认为空
+            list_item.parser_url = "https://tzxm.zjzwfw.gov.cn/publicannouncement.do?method=projectDetail" # 详情页数据链接
+            list_item.href = detail_href  # 详情链接
+            list_item.parser = "detail_get"
             data = {
                 "projectuuid": pUid
             }
-            data_item.request_params = {"data":data}
-            data_item.is_check_spider = False
-
-            yield data_item
+            list_item.request_params = {"data": data}
+            list_item.is_check_spider = False
+            yield list_item
 
-        # 翻页
-        request = self.infinite_pages(request, response)
-        yield request
 
 if __name__ == "__main__":
-    Njpc_Feapder(redis_key="lzz:zjstzxmzxspjgpt_pppzdtjxm").start()
+    Spider(redis_key="lzz:zjstzxmzxspjgpt_pppzdtjxm").start()

+ 14 - 10
zj_zjstzxmzxspjgpt_pppzdtjxm_njpc/浙江省投资项目在线审批监管平台-详情页.py

@@ -6,13 +6,14 @@ Created on 2025-04-15
 ---------
 @author: lzz
 """
+import random
 import time
+
 import feapder
+import requests
+from feapder.network.selector import Selector
 from items.njpc_item import DataNjpcItem
 from untils.attachment import AttachmentDownloader
-from feapder.network.selector import Selector
-import requests
-import random
 from untils.get_imgcode import get_code
 from untils.tools import get_proxy
 
@@ -79,7 +80,7 @@ headers = {
 }
 
 
-class Details(feapder.PlanToBuildDetailSpider):
+class Spider(feapder.PlanToBuildDetailSpider):
     proxy = get_proxy()
 
     def start_requests(self):
@@ -220,11 +221,14 @@ class Details(feapder.PlanToBuildDetailSpider):
                     if file_type:
                         file_url = file_url + f"&Txtidcode={code}"
                         attachment = AttachmentDownloader().fetch_attachment(
-                            file_name=file_name, file_type=file_type, download_url=file_url, cookies=cks)
-                        if attachment.__contains__('fid'):
-                            attachments[str(len(attachments) + 1)] = attachment
-
-            if attachments:
+                            file_name=file_name,
+                            file_type=file_type,
+                            download_url=file_url,
+                            cookies=cks
+                        )
+                        attachments[str(len(attachments) + 1)] = attachment
+
+            if len(attachments) > 0:
                 data_item.projectinfo = {"attachments": attachments}
 
             yield data_item
@@ -232,4 +236,4 @@ class Details(feapder.PlanToBuildDetailSpider):
 
 
 if __name__ == '__main__':
-    Details(redis_key="lzz:zjstzxmzxspjgpt_pppzdtjxm").start()
+    Spider(redis_key="lzz:zjstzxmzxspjgpt_pppzdtjxm").start()