123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- # -*- coding: utf-8 -*-
- """
- Created on 2025-04-19
- ---------
- @summary: 甘肃省公共资源交易网
- ---------
- @author: lzz
- """
- import time
- import feapder
- from feapder.utils.tools import log
- from items.spider_item import DataBakItem
- from untils.attachment import AttachmentDownloader
- from untils.tools import extract_file_type
- headers = {
- "Accept": "*/*",
- "Accept-Language": "zh-CN,zh;q=0.9",
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
- "Origin": "http://47.110.59.239:9207",
- "Pragma": "no-cache",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
- "X-Requested-With": "XMLHttpRequest"
- }
- class FirefoxDetails(feapder.BiddingDetailSpider):
- def start_requests(self):
- data_list = self.get_tasks_by_rabbitmq(limit=30)
- for item in data_list:
- # log.debug(item)
- request_params = item.get("request_params")
- yield feapder.Request(url=item.get("parse_url"), item=item, files_info=item.get("files"),
- deal_detail=item.get("deal_detail"), **request_params, headers=headers,
- callback=eval(item.get("parse")))
- def detail_get(self, request, response):
- items = request.item
- list_item = DataBakItem(**items)
- html = response.xpath('//div[@class="jxTenderObjMain"]').extract_first("")
- list_item.contenthtml = html
- attachments = {}
- file__list = response.xpath('//iframe')
- if file__list:
- for info in file__list:
- file_url = info.xpath('./@src').extract_first("")
- file_type = extract_file_type(file_url=file_url)
- if file_url and file_type:
- attachment = AttachmentDownloader().fetch_attachment(
- file_name=list_item.title, file_type=file_type, download_url=file_url)
- attachments[str(len(attachments) + 1)] = attachment
- if attachments:
- list_item.projectinfo = {"attachments": attachments}
- yield list_item
- if __name__ == "__main__":
- FirefoxDetails(redis_key="lzz:gssggzyjyw_tdhkyq").start()
|