# -*- coding: utf-8 -*- """ Created on 2025-04-09 --------- @summary: 国铁采购平台 --------- @author: lzz """ import re import feapder from items.spider_item import DataBakItem from untils.tools import get_proxy from fingerprint import get_fingerprint class Spider(feapder.BiddingDetailSpider): def start_callback(self): self.cookies = None self.proxy = get_proxy() def start_requests(self): data_list = self.get_tasks_by_rabbitmq(limit=100) for item in data_list: request_params = item.get("request_params") yield feapder.Request(url=item.get("parse_url"), proxies=False, callback=eval(item.get("parse")), item=item, deal_detail=item.get("deal_detail"), **request_params) def download_midware(self, request): headers = { 'Accept': '*/*', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Cache-Control': 'no-cache', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Origin': 'https://cg.95306.cn', 'Referer': request.item.get('href'), "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", 'X-Requested-With': 'XMLHttpRequest', } if self.cookies is None: self.cookies = { 'AlteonPcgmh': '0a03b7f3bb36ad3f1f41', 'mhId': request.params['mhId'], } request.headers = headers request.proxies = self.proxy request.cookies = self.cookies def validate(self, request, response): data = response.json.get('data') if not data: raise ValueError('数据不能为空!') return True def detail_get(self, request, response): items = request.item html = response.json.get('data').get('noticeContent').get('notCont') html = re.sub('data:image(.*?) ', '', html, flags=re.S | re.M) data_item = DataBakItem(**items) data_item.contenthtml = html yield data_item def exception_request(self, request, response): self.cookies = None self.proxy = get_proxy() request.params['mhId'] = get_fingerprint() yield request if __name__ == "__main__": Spider(redis_key="lzz:Gtcgpt").start()