招标信息-详情页.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2025-04-29
  4. ---------
  5. @summary: 中国联通采购与招标网
  6. ---------
  7. @author: lzz
  8. """
  9. import feapder
  10. from items.spider_item import DataBakItem
  11. from untils.WebCookiePool import WebCookiePool
  12. headers = {
  13. "Accept": "*/*",
  14. "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
  15. "Authorization": "null",
  16. "Connection": "keep-alive",
  17. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
  18. "roleId;": ""
  19. }
  20. class Spider(feapder.BiddingDetailSpider):
  21. def start_callback(self):
  22. self.cookie_pool = WebCookiePool(redis_key="zgydcgyzbw_ck",
  23. page_url="http://www.chinaunicombidding.cn/bidInformation",
  24. cookie_key="jqmEwVYRfTEJT")
  25. self.cookie_pool.user_agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
  26. def start_requests(self):
  27. data_lsit = self.get_tasks_by_rabbitmq(limit=50)
  28. for item in data_lsit:
  29. request_params = item.get("request_params")
  30. timeout = request_params.pop('timeout', 10)
  31. yield feapder.Request(url=item.get("parse_url"),
  32. timeout=timeout,
  33. proxies=False,
  34. callback=eval(item.get("parse")),
  35. item=item,
  36. deal_detail=item.get("deal_detail"),
  37. **request_params)
  38. def download_midware(self, request):
  39. request.headers = headers
  40. request.cookies = self.cookie_pool.get_cookie()
  41. def validate(self, request, response):
  42. if response.status_code != 200:
  43. raise ConnectionRefusedError
  44. return True
  45. def detail_get(self, request, response):
  46. items = request.item
  47. data_item = DataBakItem(**items)
  48. html = response.json.get('data').get('annoText')
  49. data_item.contenthtml = html
  50. yield data_item
  51. def exception_request(self, request, response):
  52. self.cookie_pool.del_cookie(self.cookie_pool.get_cookie())
  53. yield request
  54. if __name__ == "__main__":
  55. Spider(redis_key="lzz:zgydcgyzbw_cgxqgs").start()