# -*- coding: utf-8 -*- """ Created on 2025-01-06 --------- @summary: 东方希望数字化采购平台 --------- @author: lzz """ import feapder from items.spider_item import DataBakItem class Spider(feapder.BiddingDetailSpider): def start_requests(self): data_list = self.get_tasks_by_rabbitmq(limit=50) for item in data_list: # log.debug(item) request_params = item.get("request_params") yield feapder.Request(url=item.get("parse_url"), render=True, render_time=5, callback=eval(item.get("parse")), item=item, deal_detail=item.get("deal_detail"), **request_params) def download_midware(self, request): request.headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", } def detail_get(self, request, response): items = request.item data_item = DataBakItem(**items) html = '' for xpath in request.deal_detail: html = response.xpath(xpath).extract_first() # 标书详细内容 if html is not None: break html = html.replace('我要参与', '').replace('我要提问', '') html = html.replace('