政府采购-详情页.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2025-04-19
  4. ---------
  5. @summary: 甘肃省公共资源交易网
  6. ---------
  7. @author: lzz
  8. """
  9. import time
  10. import feapder
  11. from feapder.utils.tools import log
  12. from items.spider_item import DataBakItem
  13. from untils.attachment import AttachmentDownloader
  14. from untils.tools import extract_file_type,text_search
  15. headers = {
  16. "Accept": "*/*",
  17. "Accept-Language": "zh-CN,zh;q=0.9",
  18. "Cache-Control": "no-cache",
  19. "Connection": "keep-alive",
  20. "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
  21. "Origin": "http://47.110.59.239:9207",
  22. "Pragma": "no-cache",
  23. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
  24. "X-Requested-With": "XMLHttpRequest"
  25. }
  26. class FirefoxDetails(feapder.BiddingDetailSpider):
  27. def start_requests(self):
  28. data_list = self.get_tasks_by_rabbitmq(limit=30)
  29. for item in data_list:
  30. # log.debug(item)
  31. request_params = item.get("request_params")
  32. yield feapder.Request(url=item.get("parse_url"), item=item, files_info=item.get("files"),
  33. deal_detail=item.get("deal_detail"), **request_params, headers=headers,
  34. callback=eval(item.get("parse")))
  35. def detail_get(self, request, response):
  36. items = request.item
  37. list_item = DataBakItem(**items)
  38. html = response.xpath('//div[@class="jxGonggaoInformationDetail"]').extract_first("")
  39. attachments = {}
  40. file__list = response.xpath('//div[@class="jxGonggaoInformationDetail"]//iframe|//img')
  41. if file__list:
  42. for index,info in enumerate(file__list):
  43. index += 1
  44. file_url = info.xpath('./@src').extract_first("")
  45. file_type = extract_file_type(file_url=file_url)
  46. if file_url and file_type:
  47. attachment = AttachmentDownloader().fetch_attachment(
  48. file_name=f'{index}', file_type=file_type, download_url=file_url)
  49. attachments[str(len(attachments) + 1)] = attachment
  50. if attachments:
  51. list_item.projectinfo = {"attachments": attachments}
  52. if text_search(html).total < 10:
  53. html = "详情请访问原网页!"
  54. list_item.contenthtml = html
  55. yield list_item
  56. if __name__ == "__main__":
  57. FirefoxDetails(redis_key="lzz:gssggzyjyw_zfcg").start()