3 viikkoa sitten · b79c670dc1
--- a/zj_zjstzxmzxspjgpt_pppzdtjxm_njpc/浙江省投资项目在线审批监管平台-列表页.py
+++ b/zj_zjstzxmzxspjgpt_pppzdtjxm_njpc/浙江省投资项目在线审批监管平台-列表页.py
@@ -6,39 +6,34 @@ Created on 2025-04-15
 
				 ---------
			
 
				 @author: lzz
			
 
				 """
			
 
				-import feapder
			
 
				-from items.njpc_item import NjpcListItem
			
 
				 from collections import namedtuple
			
 
				 
			
 
				+import feapder
			
 
				+from items.njpc_item import NjpcListItem
			
 
				 
			
 
				 
			
 
				-
			
 
				-class Njpc_Feapder(feapder.PlanToBuildListSpider):
			
 
				+class Spider(feapder.PlanToBuildListSpider):
			
 
				 
			
 
				     def start_callback(self):
			
 
				         Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
			
 
				-
			
 
				         self.site = "浙江省投资项目在线审批监管平台"
			
 
				-
			
 
				         self.menus = [
			
 
				-            Menu('审批和监管事项办理结果公示', 'zj_zjstzxmzxspjgpt_pppzdtjxm_njpc', 60),
			
 
				+            Menu('审批和监管事项办理结果公示', 'zj_zjstzxmzxspjgpt_pppzdtjxm_njpc', 50),
			
 
				         ]
			
 
				-
			
 
				         self.headers = {
			
 
				             "Accept": "application/json, text/javascript, */*; q=0.01",
			
 
				             "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
			
 
				-            "Connection": "keep-alive",
			
 
				             "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
			
 
				             "Origin": "https://tzxm.zjzwfw.gov.cn",
			
 
				             "Referer": "https://tzxm.zjzwfw.gov.cn/tzxmweb/zwtpages/resultsPublicity/notice_of_publicity_new.html?page=1",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
			
 
				+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
			
 
				             "X-Requested-With": "XMLHttpRequest",
			
 
				         }
			
 
				 
			
 
				     def start_requests(self):
			
 
				-        for menu in self.menus:
			
 
				-            start_url = "https://tzxm.zjzwfw.gov.cn/publicannouncement.do?method=itemList"
			
 
				-            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
			
 
				+        url = "https://tzxm.zjzwfw.gov.cn/publicannouncement.do?method=itemList"
			
 
				+        for menu, page in self.product():
			
 
				+            yield feapder.Request(url, item=menu._asdict(), page=page)
			
 
				 
			
 
				     def download_midware(self, request):
			
 
				         page = request.page
			
@@ -54,7 +49,6 @@ class Njpc_Feapder(feapder.PlanToBuildListSpider):
 
				         request.headers = self.headers
			
 
				 
			
 
				     def parse(self, request, response):
			
 
				-
			
 
				         menu = request.item
			
 
				         info_list = response.json[0].get('itemList')
			
 
				         for info in info_list:
			
@@ -69,33 +63,29 @@ class Njpc_Feapder(feapder.PlanToBuildListSpider):
 
				             city = ""       # 城市
			
 
				             district = ""
			
 
				 
			
 
				-            data_item = NjpcListItem()  # 存储数据的管道
			
 
				-            data_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 （编辑器定的）
			
 
				-            data_item.unique_key = ('href', publish_time)
			
 
				-            data_item.spidercode = menu.get("code")  # 最上方定义的爬虫code（编辑器定的）
			
 
				-            data_item.projectname = projectname      # 项目名称
			
 
				-            data_item.publishtime = publish_time     # 发布时间
			
 
				-            data_item.approvecode = deal_code
			
 
				-            data_item.approvecontent = info.get('ITEM_NAME')
			
 
				-
			
 
				-            data_item.site = self.site
			
 
				-            data_item.area = area or "全国"  # 城市默认:全国
			
 
				-            data_item.city = city  # 城市 默认为空
			
 
				-            data_item.district = district  # 城市 默认为空
			
 
				-            data_item.parser_url = "https://tzxm.zjzwfw.gov.cn/publicannouncement.do?method=projectDetail" # 详情页数据链接
			
 
				-            data_item.href = detail_href  # 详情链接
			
 
				-            data_item.parser = "detail_get"
			
 
				+            list_item = NjpcListItem()  # 存储数据的管道
			
 
				+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 （编辑器定的）
			
 
				+            list_item.unique_key = ('href', publish_time)
			
 
				+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code（编辑器定的）
			
 
				+            list_item.projectname = projectname      # 项目名称
			
 
				+            list_item.publishtime = publish_time     # 发布时间
			
 
				+            list_item.approvecode = deal_code
			
 
				+            list_item.approvecontent = info.get('ITEM_NAME')
			
 
				+
			
 
				+            list_item.site = self.site
			
 
				+            list_item.area = area or "全国"  # 城市默认:全国
			
 
				+            list_item.city = city  # 城市 默认为空
			
 
				+            list_item.district = district  # 城市 默认为空
			
 
				+            list_item.parser_url = "https://tzxm.zjzwfw.gov.cn/publicannouncement.do?method=projectDetail" # 详情页数据链接
			
 
				+            list_item.href = detail_href  # 详情链接
			
 
				+            list_item.parser = "detail_get"
			
 
				             data = {
			
 
				                 "projectuuid": pUid
			
 
				             }
			
 
				-            data_item.request_params = {"data":data}
			
 
				-            data_item.is_check_spider = False
			
 
				-
			
 
				-            yield data_item
			
 
				+            list_item.request_params = {"data": data}
			
 
				+            list_item.is_check_spider = False
			
 
				+            yield list_item
			
 
				 
			
 
				-        # 翻页
			
 
				-        request = self.infinite_pages(request, response)
			
 
				-        yield request
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    Njpc_Feapder(redis_key="lzz:zjstzxmzxspjgpt_pppzdtjxm").start()
			
 
				+    Spider(redis_key="lzz:zjstzxmzxspjgpt_pppzdtjxm").start()
			
--- a/zj_zjstzxmzxspjgpt_pppzdtjxm_njpc/浙江省投资项目在线审批监管平台-详情页.py
+++ b/zj_zjstzxmzxspjgpt_pppzdtjxm_njpc/浙江省投资项目在线审批监管平台-详情页.py
@@ -6,13 +6,14 @@ Created on 2025-04-15
 
				 ---------
			
 
				 @author: lzz
			
 
				 """
			
 
				+import random
			
 
				 import time
			
 
				+
			
 
				 import feapder
			
 
				+import requests
			
 
				+from feapder.network.selector import Selector
			
 
				 from items.njpc_item import DataNjpcItem
			
 
				 from untils.attachment import AttachmentDownloader
			
 
				-from feapder.network.selector import Selector
			
 
				-import requests
			
 
				-import random
			
 
				 from untils.get_imgcode import get_code
			
 
				 from untils.tools import get_proxy
			
 
				 
			
@@ -79,7 +80,7 @@ headers = {
 
				 }
			
 
				 
			
 
				 
			
 
				-class Details(feapder.PlanToBuildDetailSpider):
			
 
				+class Spider(feapder.PlanToBuildDetailSpider):
			
 
				     proxy = get_proxy()
			
 
				 
			
 
				     def start_requests(self):
			
@@ -220,11 +221,14 @@ class Details(feapder.PlanToBuildDetailSpider):
 
				                     if file_type:
			
 
				                         file_url = file_url + f"&Txtidcode={code}"
			
 
				                         attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                            file_name=file_name, file_type=file_type, download_url=file_url, cookies=cks)
			
 
				-                        if attachment.__contains__('fid'):
			
 
				-                            attachments[str(len(attachments) + 1)] = attachment
			
 
				-
			
 
				-            if attachments:
			
 
				+                            file_name=file_name,
			
 
				+                            file_type=file_type,
			
 
				+                            download_url=file_url,
			
 
				+                            cookies=cks
			
 
				+                        )
			
 
				+                        attachments[str(len(attachments) + 1)] = attachment
			
 
				+
			
 
				+            if len(attachments) > 0:
			
 
				                 data_item.projectinfo = {"attachments": attachments}
			
 
				 
			
 
				             yield data_item
			
@@ -232,4 +236,4 @@ class Details(feapder.PlanToBuildDetailSpider):
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    Details(redis_key="lzz:zjstzxmzxspjgpt_pppzdtjxm").start()
			
 
				+    Spider(redis_key="lzz:zjstzxmzxspjgpt_pppzdtjxm").start()