_selenium.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2022/7/26 4:28 下午
  4. ---------
  5. @summary:
  6. ---------
  7. @author: Boris
  8. @email: boris_liu@foxmail.com
  9. """
  10. import feapder.setting as setting
  11. import feapder.utils.tools as tools
  12. from feapder.network.downloader.base import RenderDownloader
  13. from feapder.network.response import Response
  14. from feapder.utils.webdriver import WebDriverPool, SeleniumDriver
  15. class SeleniumDownloader(RenderDownloader):
  16. webdriver_pool: WebDriverPool = None
  17. @property
  18. def _webdriver_pool(self):
  19. if not self.__class__.webdriver_pool:
  20. self.__class__.webdriver_pool = WebDriverPool(
  21. **setting.WEBDRIVER, driver=SeleniumDriver
  22. )
  23. return self.__class__.webdriver_pool
  24. def download(self, request) -> Response:
  25. # 代理优先级 自定义 > 配置文件 > 随机
  26. if request.custom_proxies:
  27. proxy = request.get_proxy()
  28. elif setting.WEBDRIVER.get("proxy"):
  29. proxy = setting.WEBDRIVER.get("proxy")
  30. else:
  31. proxy = request.get_proxy()
  32. # user_agent优先级 自定义 > 配置文件 > 随机
  33. if request.custom_ua:
  34. user_agent = request.get_user_agent()
  35. elif setting.WEBDRIVER.get("user_agent"):
  36. user_agent = setting.WEBDRIVER.get("user_agent")
  37. else:
  38. user_agent = request.get_user_agent()
  39. cookies = request.get_cookies()
  40. url = request.url
  41. render_time = request.render_time or setting.WEBDRIVER.get("render_time")
  42. if request.get_params():
  43. url = tools.joint_url(url, request.get_params())
  44. browser: SeleniumDriver = self._webdriver_pool.get(
  45. user_agent=user_agent, proxy=proxy
  46. )
  47. try:
  48. browser.get(url)
  49. if cookies:
  50. browser.cookies = cookies
  51. # 刷新使cookie生效
  52. browser.get(url)
  53. if render_time:
  54. tools.delay_time(render_time)
  55. html = browser.page_source
  56. response = Response.from_dict(
  57. {
  58. "url": browser.current_url,
  59. "cookies": browser.cookies,
  60. "_content": html.encode(),
  61. "status_code": 200,
  62. "elapsed": 666,
  63. "headers": {
  64. "User-Agent": browser.user_agent,
  65. "Cookie": tools.cookies2str(browser.cookies),
  66. },
  67. }
  68. )
  69. response.driver = browser
  70. response.browser = browser
  71. return response
  72. except Exception as e:
  73. self._webdriver_pool.remove(browser)
  74. raise e
  75. def close(self, driver):
  76. if driver:
  77. self._webdriver_pool.remove(driver)
  78. def put_back(self, driver):
  79. """
  80. 释放浏览器对象
  81. """
  82. self._webdriver_pool.put(driver)
  83. def close_all(self):
  84. """
  85. 关闭所有浏览器
  86. """
  87. self._webdriver_pool.close()