1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- # -*- coding: utf-8 -*-
- """
- Created on 2025-04-29
- ---------
- @summary: 中国联通采购与招标网
- ---------
- @author: lzz
- """
- import feapder
- from items.spider_item import DataBakItem
- from untils.WebCookiePool import WebCookiePool
- headers = {
- "Accept": "*/*",
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
- "Authorization": "null",
- "Connection": "keep-alive",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
- "roleId;": ""
- }
- cookie_pool = WebCookiePool(redis_key="zgydcgyzbw_ck",
- page_url="http://www.chinaunicombidding.cn/bidInformation",
- cookie_key="jqmEwVYRfTEJT",
- headless=True)
- class Spider(feapder.BiddingDetailSpider):
- def start_requests(self):
- data_lsit = self.get_tasks_by_rabbitmq(limit=50)
- for item in data_lsit:
- request_params = item.get("request_params")
- timeout = request_params.pop('timeout', 10)
- yield feapder.Request(url=item.get("parse_url"),
- proxies=False,
- timeout=timeout,
- callback=eval(item.get("parse")),
- item=item,
- deal_detail=item.get("deal_detail"),
- **request_params)
- def download_midware(self, request):
- request.headers = headers
- request.cookies = cookie_pool.get_cookie()
- def validate(self, request, response):
- if response.status_code != 200:
- raise ConnectionRefusedError
- def detail_get(self, request, response):
- items = request.item
- data_item = DataBakItem(**items)
- html = response.json.get('data').get('annoText')
- data_item.contenthtml = html
- yield data_item
- def exception_request(self, request, response):
- cookie_pool.del_cookie(cookie_pool.get_cookie())
- yield request
- if __name__ == "__main__":
- Spider(redis_key="lzz:zgydcgyzbw_cgxqgs").start()
|