_drissionpage.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2025-05-14
  4. ---------
  5. @summary:
  6. ---------
  7. @author: Dzr
  8. """
  9. import feapder.setting as setting
  10. import feapder.utils.tools as tools
  11. from feapder.network.downloader.base import RenderDownloader
  12. from feapder.network.response import Response
  13. from feapder.utils.webdriver import WebDriverPool, DrissionPageDriver
  14. class DrissionPageDownloader(RenderDownloader):
  15. webdriver_pool: WebDriverPool = None
  16. @property
  17. def _webdriver_pool(self):
  18. if not self.__class__.webdriver_pool:
  19. self.__class__.webdriver_pool = WebDriverPool(
  20. **setting.DRISSIONPAGE, driver_cls=DrissionPageDriver
  21. )
  22. return self.__class__.webdriver_pool
  23. def download(self, request) -> Response:
  24. # 代理优先级 自定义 > 配置文件 > 随机
  25. if request.custom_proxies:
  26. proxy = request.get_proxy()
  27. elif setting.DRISSIONPAGE.get("proxy"):
  28. proxy = setting.DRISSIONPAGE.get("proxy")
  29. else:
  30. proxy = request.get_proxy()
  31. # user_agent优先级 自定义 > 配置文件 > 随机
  32. if request.custom_ua:
  33. user_agent = request.get_user_agent()
  34. elif setting.DRISSIONPAGE.get("user_agent"):
  35. user_agent = setting.DRISSIONPAGE.get("user_agent")
  36. else:
  37. user_agent = request.get_user_agent()
  38. cookies = request.get_cookies()
  39. url = request.url
  40. render_time = request.render_time or setting.DRISSIONPAGE.get("render_time")
  41. if request.get_params():
  42. url = tools.joint_url(url, request.get_params())
  43. driver: DrissionPageDriver = self._webdriver_pool.get(
  44. user_agent=user_agent, proxy=proxy
  45. )
  46. try:
  47. if cookies:
  48. driver.url = url
  49. driver.tab.set.cookies = cookies
  50. retry = setting.DRISSIONPAGE.get("retry")
  51. interval = setting.DRISSIONPAGE.get("interval")
  52. timeout = setting.DRISSIONPAGE.get("page_load")
  53. driver.tab.get(url, retry=retry, interval=interval, timeout=timeout)
  54. driver.wait_for_dom_load(timeout=render_time)
  55. if driver.tab.mode == "s":
  56. response = Response(driver.tab.response)
  57. else:
  58. cookies = driver.tab.cookies().as_dict()
  59. html = driver.tab.html
  60. response = Response.from_dict(
  61. {
  62. "url": driver.tab.url,
  63. "cookies": cookies,
  64. "_content": html.encode(),
  65. "status_code": 200,
  66. "elapsed": 666,
  67. "headers": {
  68. "User-Agent": driver.tab.user_agent,
  69. "Cookie": tools.cookies2str(cookies),
  70. },
  71. }
  72. )
  73. response.driver = driver
  74. response.browser = driver
  75. return response
  76. except Exception as e:
  77. self._webdriver_pool.remove(driver)
  78. request.del_proxy()
  79. raise e
  80. def close(self, driver):
  81. if driver:
  82. self._webdriver_pool.remove(driver)
  83. def put_back(self, driver):
  84. """
  85. 释放浏览器对象
  86. """
  87. self._webdriver_pool.put(driver)
  88. def close_all(self):
  89. """
  90. 关闭所有浏览器
  91. """
  92. self._webdriver_pool.close()