_playwright.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2022/9/7 4:05 PM
  4. ---------
  5. @summary:
  6. ---------
  7. @author: Boris
  8. @email: boris_liu@foxmail.com
  9. """
  10. import feapder.setting as setting
  11. import feapder.utils.tools as tools
  12. from feapder.network.downloader.base import RenderDownloader
  13. from feapder.network.response import Response
  14. from feapder.utils.webdriver import WebDriverPool, PlaywrightDriver
  15. class PlaywrightDownloader(RenderDownloader):
  16. webdriver_pool: WebDriverPool = None
  17. @property
  18. def _webdriver_pool(self):
  19. if not self.__class__.webdriver_pool:
  20. self.__class__.webdriver_pool = WebDriverPool(
  21. **setting.PLAYWRIGHT, driver_cls=PlaywrightDriver, thread_safe=True
  22. )
  23. return self.__class__.webdriver_pool
  24. def download(self, request) -> Response:
  25. # 代理优先级 自定义 > 配置文件 > 随机
  26. if request.custom_proxies:
  27. proxy = request.get_proxy()
  28. elif setting.PLAYWRIGHT.get("proxy"):
  29. proxy = setting.PLAYWRIGHT.get("proxy")
  30. else:
  31. proxy = request.get_proxy()
  32. # user_agent优先级 自定义 > 配置文件 > 随机
  33. if request.custom_ua:
  34. user_agent = request.get_user_agent()
  35. elif setting.PLAYWRIGHT.get("user_agent"):
  36. user_agent = setting.PLAYWRIGHT.get("user_agent")
  37. else:
  38. user_agent = request.get_user_agent()
  39. cookies = request.get_cookies()
  40. url = request.url
  41. render_time = request.render_time or setting.PLAYWRIGHT.get("render_time")
  42. wait_until = setting.PLAYWRIGHT.get("wait_until") or "domcontentloaded"
  43. if request.get_params():
  44. url = tools.joint_url(url, request.get_params())
  45. driver: PlaywrightDriver = self._webdriver_pool.get(
  46. user_agent=user_agent, proxy=proxy
  47. )
  48. try:
  49. if cookies:
  50. driver.url = url
  51. driver.cookies = cookies
  52. http_response = driver.page.goto(url, wait_until=wait_until)
  53. status_code = http_response.status
  54. if render_time:
  55. tools.delay_time(render_time)
  56. html = driver.page.content()
  57. response = Response.from_dict(
  58. {
  59. "url": driver.page.url,
  60. "cookies": driver.cookies,
  61. "_content": html.encode(),
  62. "status_code": status_code,
  63. "elapsed": 666,
  64. "headers": {
  65. "User-Agent": driver.user_agent,
  66. "Cookie": tools.cookies2str(driver.cookies),
  67. },
  68. }
  69. )
  70. response.driver = driver
  71. response.browser = driver
  72. return response
  73. except Exception as e:
  74. self._webdriver_pool.remove(driver)
  75. raise e
  76. def close(self, driver):
  77. if driver:
  78. self._webdriver_pool.remove(driver)
  79. def put_back(self, driver):
  80. """
  81. 释放浏览器对象
  82. """
  83. self._webdriver_pool.put(driver)
  84. def close_all(self):
  85. """
  86. 关闭所有浏览器
  87. """
  88. # 不支持
  89. # self._webdriver_pool.close()
  90. pass