交易信息-详情页.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2025-06-02
  4. ---------
  5. @summary: 芜湖市网上中介超市
  6. ---------
  7. @author: lzz
  8. """
  9. import feapder
  10. from items.spider_item import DataBakItem
  11. from untils.attachment import AttachmentDownloader
  12. from untils.tools import extract_file_type
  13. import requests
  14. def get_file(did):
  15. headers = {
  16. "Accept": "application/json, text/plain, */*",
  17. "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
  18. "Cache-Control": "no-cache",
  19. "Connection": "keep-alive",
  20. "Content-Type": "application/x-www-form-urlencoded",
  21. "Origin": "https://wh.ahzwfw.gov.cn",
  22. "Pragma": "no-cache",
  23. "Referer": "https://wh.ahzwfw.gov.cn/wszjcs-web/views/projectnotice/projectnotice.html",
  24. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
  25. }
  26. url = "https://wh.ahzwfw.gov.cn/wszjcs-web/tradingInformation/getTradingInformationDetails.do"
  27. data = {
  28. "id": did,
  29. }
  30. response = requests.post(url, headers=headers, data=data, timeout=30, verify=False)
  31. return response.json().get('data').get('attachmentDtos')
  32. class Details(feapder.BiddingDetailSpider):
  33. def start_requests(self):
  34. data_lsit = self.get_tasks_by_rabbitmq(limit=10)
  35. for item in data_lsit:
  36. request_params = item.get("request_params")
  37. timeout = request_params.get('timeout', 10)
  38. request_params.pop('timeout', None)
  39. did = item.get('did')
  40. yield feapder.Request(url=item.get("parse_url"), item=item, proxies=False,render=True,render_time=5,
  41. deal_detail=item.get("deal_detail"), callback=eval(item.get("parse")),
  42. **request_params, timeout=timeout,did=did)
  43. def detail_get(self, request, response):
  44. items = request.item
  45. list_item = DataBakItem(**items)
  46. html = response.xpath('//div[@class="detail"]').extract_first()
  47. attachments = {}
  48. file_list = get_file(request.did)
  49. if file_list:
  50. for info in file_list:
  51. file_name = info.get('fileName').strip()
  52. fid = info.get('filePath')
  53. file_url = f"https://wh.ahzwfw.gov.cn/wszjcs-web/file/downloadFile.do?fileId={fid}&fileName={file_name}"
  54. file_type = extract_file_type(file_name=file_name, file_url=fid)
  55. if file_type:
  56. attachment = AttachmentDownloader().fetch_attachment(
  57. file_name=file_name, file_type=file_type, download_url=file_url,)
  58. attachments[str(len(attachments) + 1)] = attachment
  59. if attachments:
  60. list_item.projectinfo = {"attachments": attachments}
  61. list_item.contenthtml = html
  62. yield list_item
  63. if __name__ == "__main__":
  64. Details(redis_key="lzz:wnjtdzzbcgpt_jggs").start()