|
@@ -0,0 +1,118 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+"""
|
|
|
+Created on 2024-12-05
|
|
|
+---------
|
|
|
+@summary: 中国邮政储蓄银行邮银易采
|
|
|
+---------
|
|
|
+@author: lzz
|
|
|
+"""
|
|
|
+import feapder
|
|
|
+from items.spider_item import BidingListItem
|
|
|
+from collections import namedtuple
|
|
|
+from untils.WebCookiePool import WebCookiePool
|
|
|
+
|
|
|
+
|
|
|
+class ZtbpcFeapder(feapder.BiddingListSpider):
|
|
|
+
|
|
|
+ def start_callback(self):
|
|
|
+
|
|
|
+ self.site = "中国邮政储蓄银行邮银易采"
|
|
|
+
|
|
|
+ Menu = namedtuple('Menu', ['channel', 'code', 'cid', 'crawl_page'])
|
|
|
+
|
|
|
+ self.menus = [
|
|
|
+ Menu('非招标信息-采购公告', 'a_zgyzcxyhyyyc_fzbxx_cggg', '229', 1),
|
|
|
+ Menu('非招标信息-成交结果公示', 'a_zgyzcxyhyyyc_fzbxx_cjjggs', '230', 1),
|
|
|
+ Menu('非招标信息-成交结果公告', 'a_zgyzcxyhyyyc_fzbxx_cjjggg', '231', 1),
|
|
|
+ Menu('非招标信息-变更公告', 'a_zgyzcxyhyyyc_fzbxx_bggg', '232', 1),
|
|
|
+ Menu('非招标信息-其他公告', 'a_zgyzcxyhyyyc_fzbxx_qtgg', '233', 1),
|
|
|
+ Menu('非招标信息-单一来源采前公示', 'a_zgyzcxyhyyyc_fzbxx_dylycqgs', '236', 1),
|
|
|
+ ]
|
|
|
+
|
|
|
+ self.headers = {
|
|
|
+ "Accept": "application/json, text/javascript, */*; q=0.01",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Content-Type": "application/json; charset=UTF-8",
|
|
|
+ "Origin": "https://cg.psbc.com",
|
|
|
+ "Pragma": "no-cache",
|
|
|
+ "Referer": "https://cg.psbc.com/cms/default/webfile/2ywgg1/index.html",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
|
+ "X-Requested-With": "XMLHttpRequest",
|
|
|
+ }
|
|
|
+
|
|
|
+ self.cookie_pool = WebCookiePool(redis_key="zgyzcxyhyyyc_zbxx",page_url="https://cg.psbc.com/cms/default/webfile/1ywgg2/index.html",
|
|
|
+ cookie_key="V3iEwBUtWULVP",driver_type="FIREFOX")
|
|
|
+
|
|
|
+ self.ct = 0
|
|
|
+
|
|
|
+ def start_requests(self):
|
|
|
+ for menu in self.menus:
|
|
|
+ start_url = "https://cg.psbc.com/cms/api/dynamicData/queryContentPage"
|
|
|
+ yield feapder.Request(url=start_url, item=menu._asdict(), page=1, proxies=False)
|
|
|
+
|
|
|
+ def download_midware(self, request):
|
|
|
+ page = request.page
|
|
|
+ menu = request.item
|
|
|
+ data = {
|
|
|
+ "pageNo": page,
|
|
|
+ "pageSize": "10",
|
|
|
+ "dto": {
|
|
|
+ "siteId": "725",
|
|
|
+ "categoryId": menu.get('cid'),
|
|
|
+ "city": "",
|
|
|
+ "county": "",
|
|
|
+ "purchaseMode": "",
|
|
|
+ "secondCompanyId": ""
|
|
|
+ }
|
|
|
+ }
|
|
|
+ cookies = self.cookie_pool.get_cookie()
|
|
|
+ request.cookies = cookies
|
|
|
+ request.json = data
|
|
|
+ request.headers = self.headers
|
|
|
+
|
|
|
+ def parse(self, request, response):
|
|
|
+ if self.ct > 5:
|
|
|
+ return
|
|
|
+ if response.status_code != 200:
|
|
|
+ self.ct += 1
|
|
|
+ self.cookie_pool.del_cookie(self.cookie_pool.get_cookie())
|
|
|
+ yield request
|
|
|
+ else:
|
|
|
+ self.ct = 0
|
|
|
+ menu = request.item
|
|
|
+ info_list = response.json.get('res').get('rows')
|
|
|
+ for info in info_list:
|
|
|
+ title = info.get('title').strip()
|
|
|
+ hid = info.get('url')
|
|
|
+ href = f"https://cg.psbc.com/cms/default/webfile{hid}"
|
|
|
+ publish_time = info.get('publishDate').replace('T',' ').split('.')[0]
|
|
|
+
|
|
|
+ did = href.split('/')[-1].split('.')[0]
|
|
|
+
|
|
|
+ area = "全国"
|
|
|
+ city = ""
|
|
|
+
|
|
|
+ list_item = BidingListItem() # 存储数据的管道
|
|
|
+ list_item.href = href # 标书链接
|
|
|
+ list_item.channel = menu.get("channel") # 最上方定义的抓取栏目 (编辑器定的)
|
|
|
+ list_item.spidercode = menu.get("code") # 最上方定义的爬虫code(编辑器定的)
|
|
|
+ list_item.title = title # 标题
|
|
|
+ list_item.publishtime = publish_time # 标书发布时间
|
|
|
+ list_item.site = self.site
|
|
|
+ list_item.area = area or "全国" # 省份 默认:全国
|
|
|
+ list_item.city = city # 城市 默认 为空
|
|
|
+
|
|
|
+ list_item.unique_key = ('href',)
|
|
|
+ list_item.parse = "self.detail_get" # 详情页回调方法
|
|
|
+ list_item.parse_url = f"https://cg.psbc.com/cms/api/dynamicData/queryContentInfo?contentId={did}"
|
|
|
+
|
|
|
+ yield list_item
|
|
|
+
|
|
|
+ request = self.infinite_pages(request, response)
|
|
|
+ yield request
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ ZtbpcFeapder(redis_key="lzz:zgyzcxyhyyyc_zbxx_zbgg").start()
|