12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- # -*- coding: utf-8 -*-
- """
- Created on 2025-04-25
- ---------
- @summary: 国铁采购平台
- ---------
- @author: lzz
- """
- import re
- import feapder
- import feapder.utils.tools as tools
- from items.spider_item import DataBakItem
- from untils.tools import get_proxy
- from fingerprint import get_fingerprint, fetch_alteon_pcgmh, check_fingerprint
- class Spider(feapder.BiddingDetailSpider):
- def start_callback(self):
- self.alteon_pcgmh = None
- self.cookies = None
- self.proxy = get_proxy()
- def start_requests(self):
- data_list = self.get_tasks_by_rabbitmq(limit=100)
- for item in data_list:
- request_params = item.get("request_params")
- yield feapder.Request(url=item.get("parse_url"),
- proxies=False,
- item=item,
- deal_detail=item.get("deal_detail"),
- **request_params)
- def download_midware(self, request):
- headers = {
- 'Accept': '*/*',
- 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
- 'Cache-Control': 'no-cache',
- 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
- 'Origin': 'https://cg.95306.cn',
- 'Referer': request.item.get('href'),
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
- 'X-Requested-With': 'XMLHttpRequest',
- }
- if self.alteon_pcgmh is None:
- self.alteon_pcgmh = fetch_alteon_pcgmh(self.proxy)
- if self.cookies is None:
- self.cookies = {
- 'AlteonPcgmh': self.alteon_pcgmh,
- 'mhId': request.params['mhId'],
- }
- request.headers = headers
- request.proxies = self.proxy
- request.cookies = self.cookies
- def validate(self, request, response):
- if response.json['code'] == '0-0203':
- referer = request.item.get('href')
- check_fingerprint(request.params['mhId'], self.cookies, referer, self.proxy)
- request.callback = self.request_retry
- return True
- elif response.json.get('data'):
- request.callback = tools.resolve_method(self, request.item['parse'])
- return True
- else:
- raise ValueError('数据不能为空!')
- def request_retry(self, request, response):
- if 'parse' not in request.item:
- raise AttributeError('request.item not attribute "parse"')
- yield request
- def detail_get(self, request, response):
- item = request.item
- data_item = DataBakItem(**item)
- html = response.json.get('data').get('noticeContent').get('notCont')
- html = re.sub('data:image(.*?) ', '', html, flags=re.S | re.M)
- data_item.contenthtml = html
- yield data_item
- def exception_request(self, request, response):
- self.alteon_pcgmh = None
- self.cookies = None
- self.proxy = get_proxy()
- request.params['mhId'] = get_fingerprint()
- yield request
- if __name__ == "__main__":
- Spider(redis_key="lzz:Gtcgpt").start()
|