招标信息-详情页.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2025-04-29
  4. ---------
  5. @summary: 中国联通采购与招标网
  6. ---------
  7. @author: lzz
  8. """
  9. import feapder
  10. from items.spider_item import DataBakItem
  11. from untils.WebCookiePool import WebCookiePool
  12. headers = {
  13. "Accept": "*/*",
  14. "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
  15. "Authorization": "null",
  16. "Connection": "keep-alive",
  17. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
  18. "roleId;": ""
  19. }
  20. cookie_pool = WebCookiePool(redis_key="zgydcgyzbw_ck",
  21. page_url="http://www.chinaunicombidding.cn/bidInformation",
  22. cookie_key="jqmEwVYRfTEJT",
  23. headless=True)
  24. class Spider(feapder.BiddingDetailSpider):
  25. def start_requests(self):
  26. data_lsit = self.get_tasks_by_rabbitmq(limit=50)
  27. for item in data_lsit:
  28. request_params = item.get("request_params")
  29. timeout = request_params.pop('timeout', 10)
  30. yield feapder.Request(url=item.get("parse_url"),
  31. proxies=False,
  32. timeout=timeout,
  33. callback=eval(item.get("parse")),
  34. item=item,
  35. deal_detail=item.get("deal_detail"),
  36. **request_params)
  37. def download_midware(self, request):
  38. request.headers = headers
  39. request.cookies = cookie_pool.get_cookie()
  40. def validate(self, request, response):
  41. if response.status_code != 200:
  42. raise ConnectionRefusedError
  43. def detail_get(self, request, response):
  44. items = request.item
  45. data_item = DataBakItem(**items)
  46. html = response.json.get('data').get('annoText')
  47. data_item.contenthtml = html
  48. yield data_item
  49. def exception_request(self, request, response):
  50. cookie_pool.del_cookie(cookie_pool.get_cookie())
  51. yield request
  52. if __name__ == "__main__":
  53. Spider(redis_key="lzz:zgydcgyzbw_cgxqgs").start()