|
@@ -0,0 +1,140 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+"""
|
|
|
+Created on 2024-01-04
|
|
|
+---------
|
|
|
+@summary: 广东省公共资源交易平台
|
|
|
+---------
|
|
|
+@author: lzz
|
|
|
+"""
|
|
|
+import feapder
|
|
|
+from items.spider_item import MgpListItem
|
|
|
+from collections import namedtuple
|
|
|
+from gd_utils import *
|
|
|
+import json
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+class Feapder(feapder.BiddingListSpider):
|
|
|
+
|
|
|
+ def start_callback(self):
|
|
|
+ Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
|
|
|
+
|
|
|
+ self.site = "广东省公共资源交易平台"
|
|
|
+
|
|
|
+ self.menus = [
|
|
|
+ Menu('土地矿业-矿业权交易公告', 'a_gdsggzyjypt_tdky_kyqjygg', 1),
|
|
|
+ ]
|
|
|
+
|
|
|
+
|
|
|
+ def start_requests(self):
|
|
|
+ for menu in self.menus:
|
|
|
+ start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
|
|
|
+ yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
|
|
|
+
|
|
|
+ def download_midware(self, request):
|
|
|
+ page = request.page
|
|
|
+ data = {
|
|
|
+ "type": "trading-type",
|
|
|
+ "openConvert": False,
|
|
|
+ "keyword": "",
|
|
|
+ "siteCode": "44",
|
|
|
+ "secondType": "B",
|
|
|
+ "tradingProcess": "650,651,654,655,656,657,658,659,660,2A11,2A1C,3A21,3A31,3A61,3A71,3A81,3A91,3AA1,3AB1,3AC1",
|
|
|
+ "thirdType": "[]",
|
|
|
+ "projectType": "",
|
|
|
+ "publishStartTime": "",
|
|
|
+ "publishEndTime": "",
|
|
|
+ "pageNo": page,
|
|
|
+ "pageSize": 10
|
|
|
+ }
|
|
|
+ en_str = get_enstr(data)
|
|
|
+ data = json.dumps(data, separators=(',', ':'))
|
|
|
+ request.data = data
|
|
|
+ request.headers = {
|
|
|
+ "Accept": "application/json, text/plain, */*",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Content-Type": "application/json",
|
|
|
+ "Origin": "https://ygp.gdzwfw.gov.cn",
|
|
|
+ "Pragma": "no-cache",
|
|
|
+ "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
|
|
|
+ "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
|
|
|
+ "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
|
|
|
+ "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
|
|
|
+ "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def parse(self, request, response):
|
|
|
+
|
|
|
+ menu = request.item
|
|
|
+ info_list = response.json.get('data').get('pageData')
|
|
|
+ for info in info_list:
|
|
|
+ noticeSecondType = info.get('noticeSecondType')
|
|
|
+ edition = info.get('edition')
|
|
|
+ noticeId = info.get('noticeId')
|
|
|
+ projectCode = info.get('projectCode')
|
|
|
+ tradingProcess = info.get('tradingProcess')
|
|
|
+ siteCode = info.get('regionCode')
|
|
|
+ publishDate = info.get('publishDate')
|
|
|
+
|
|
|
+ params = {
|
|
|
+ "siteCode": f"{siteCode}",
|
|
|
+ "tradingType": f"{noticeSecondType}",
|
|
|
+ "bizCode": f"{tradingProcess}",
|
|
|
+ "projectCode": f"{projectCode}"
|
|
|
+ }
|
|
|
+
|
|
|
+ nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
|
|
|
+ info['nodeId'] = nodeId
|
|
|
+
|
|
|
+ href = create_href(info)
|
|
|
+ title = info.get('noticeTitle').strip()
|
|
|
+ create_time = deal_time(publishDate)
|
|
|
+ regionName = info.get('regionName','').strip()
|
|
|
+
|
|
|
+ area = "广东"
|
|
|
+ city = regionName
|
|
|
+
|
|
|
+ list_item = MgpListItem() # 存储数据的管道
|
|
|
+ list_item.href = href # 标书链接
|
|
|
+ list_item.unique_key = ('href',)
|
|
|
+ list_item.channel = menu.get("channel") # 最上方定义的抓取栏目 (编辑器定的)
|
|
|
+ list_item.spidercode = menu.get("code") # 最上方定义的爬虫code(编辑器定的)
|
|
|
+ list_item.title = title # 标题
|
|
|
+ list_item.site = self.site
|
|
|
+ list_item.publishtime = create_time
|
|
|
+ list_item.area = area # 城市默认:全国
|
|
|
+ list_item.city = city # 城市 默认为空
|
|
|
+
|
|
|
+ list_item.parse = "self.detail_get" # 详情页回调方法
|
|
|
+
|
|
|
+ dparams = {
|
|
|
+ "nodeId": f"{nodeId}",
|
|
|
+ "version": f"{edition}",
|
|
|
+ "tradingType": f"{noticeSecondType}",
|
|
|
+ "noticeId": f"{noticeId}",
|
|
|
+ "bizCode": f"{tradingProcess}",
|
|
|
+ "projectCode": f"{projectCode}",
|
|
|
+ "siteCode": f"{siteCode}"
|
|
|
+ }
|
|
|
+ list_item.request_params = {"params":dparams}
|
|
|
+ list_item.deal_detail = [] # 抽取正文xpath
|
|
|
+ list_item.proxies = False
|
|
|
+ list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
|
|
|
+
|
|
|
+
|
|
|
+ yield list_item
|
|
|
+
|
|
|
+ # 无限翻页
|
|
|
+ request = self.infinite_pages(request, response)
|
|
|
+ yield request
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ Feapder(redis_key="lzz:gdsggzyjypt_tdky_kyqjygg", user="1127").start()
|
|
|
+
|