123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- # -*- coding: utf-8 -*-
- """
- Created on 2025-03-19
- ---------
- @summary: IBK企业银行
- ---------
- @author: lzz
- """
- import feapder
- from items.spider_item import BidingListItem
- from collections import namedtuple
- import json
- import re
- import random
- import requests
- import execjs
- def get_params():
- headers = {
- "Accept": "*/*",
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
- "Connection": "keep-alive",
- "Referer": "https://www.ibkcn.com/iview/01/NWIBBIF0701",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
- }
- url = "https://www.ibkcn.com/adm/VestSubmit.jsp"
- params = {
- "v": f"{random.random()}"
- }
- res = requests.get(url, headers=headers, params=params, timeout=20)
- cookies = res.cookies.get_dict()
- s4512 = "".join(re.findall("_4512_ = '(.*?)'", res.text))
- s2110 = "".join(re.findall("_2110_ = '(.*?)'", res.text))
- s3186 = "".join(re.findall("_3186_ = '(.*?)'", res.text))
- s7174 = "".join(re.findall("_7174_ = '(.*?)'", res.text))
- s1958 = "".join(re.findall("_1958_ = '(.*?)'", res.text))
- s6907 = "".join(re.findall("_6907_ = '(.*?)'", res.text))
- s9232 = "".join(re.findall("_9232_ = '(.*?)'", res.text))
- s0192 = "".join(re.findall("_0192_ = '(.*?)'", res.text))
- s1234 = "".join(re.findall("_1234_ = '(.*?)'", res.text))
- s5329 = "".join(re.findall("_5329_ = '(.*?)'", res.text))
- return cookies, s4512, s2110, s3186, s7174, s1958, s6907, s9232, s0192, s1234, s5329
- def ctx():
- with open('ibkqyyx.js', 'r') as fr:
- ex_js = fr.read()
- return execjs.compile(ex_js)
- class ZtbpcFeapder(feapder.BiddingListSpider):
- def start_callback(self):
- self.site = "IBK企业银行"
- Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
- self.menus = [
- Menu('新闻及公告', 'a_ibkqyyx_xwjgg', 1),
- ]
- self.headers = {
- "Accept": "application/json",
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
- "Connection": "keep-alive",
- "Content-Type": "application/json; charset=\"UTF-8\"",
- "Origin": "https://www.ibkcn.com",
- "Referer": "https://www.ibkcn.com/iview/01/NWIBBIF0701",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
- "submissionid": "mf_subCont_sbmblbrInq"
- }
- self.wa_ = ""
- self.cookies, self.s4512, self.s2110, self.s3186, self.s7174, self.s1958, self.s6907, self.s9232, self.s0192, self.s1234, self.s5329 = get_params()
- def start_requests(self):
- for menu in self.menus:
- start_url = "https://www.ibkcn.com/non/blbrInq"
- yield feapder.Request(url=start_url, item=menu._asdict(), page=1, proxies=False)
- def download_midware(self, request):
- page = request.page
- _v1 = '{"dcBlbrInptMap":{"srchId":"' + str(page) + '","srchWord":"","page":"1","totalCnt":527,"rowCnt":10,"blbrDcd":"0002"}}'
- req_data = ctx().call('VestAjaxJson', _v1, self.s4512, self.s2110, self.s3186, self.s7174, self.s1958, self.s6907, self.s9232, self.s0192, self.s1234, self.s5329)
- data = req_data.get('encData')
- self.wa_ = req_data.get('firstwa_')
- data = json.loads(data)
- data = json.dumps(data, separators=(',', ':'))
- request.data = data
- request.headers = self.headers
- request.cookies = self.cookies
- def parse(self, request, response):
- menu = request.item
- decode_data = ctx().call('VestAjaxJson', response.text, self.s4512, self.s2110, self.s3186, self.s7174, self.s1958, self.s6907, self.s9232, self.s0192, self.s1234, self.s5329, self.wa_)
- info_list = json.loads(decode_data).get('dcBlbrOtptList')
- for info in info_list:
- title = info.get('blbrCon').strip()
- href = f"https://www.ibkcn.com/iview/01/NWIBBIF0701?id={info.get('dsncSqn')}"
- pt = info.get('frrgTrnYmd')
- publish_time = pt[:4] + "-" + pt[4:6] + "-" + pt[6:8]
- area = "全国"
- city = ""
- list_item = BidingListItem() # 存储数据的管道
- list_item.href = href # 标书链接
- list_item.channel = menu.get("channel") # 最上方定义的抓取栏目 (编辑器定的)
- list_item.spidercode = menu.get("code") # 最上方定义的爬虫code(编辑器定的)
- list_item.title = title # 标题
- list_item.publishtime = publish_time # 标书发布时间
- list_item.site = self.site
- list_item.area = area or "全国" # 省份 默认:全国
- list_item.city = city # 城市 默认 为空
- list_item.unique_key = ('href','title')
- list_item.parse = "self.detail_get" # 详情页回调方法
- list_item.deal_detail = []
- list_item.info = info
- list_item.parse_url = "https://www.ibkcn.com/non/blbrDtlInq"
- yield list_item
- request = self.infinite_pages(request, response)
- yield request
- if __name__ == "__main__":
- ZtbpcFeapder(redis_key="lzz:ibkqyyx_xwjgg").start()
|