webdriver_pool.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2021/3/18 4:59 下午
  4. ---------
  5. @summary:
  6. ---------
  7. @author: Boris
  8. @email: boris_liu@foxmail.com
  9. """
  10. import queue
  11. import threading
  12. from feapder.utils.log import log
  13. from feapder.utils.tools import Singleton
  14. from feapder.utils.webdriver.selenium_driver import SeleniumDriver
  15. @Singleton
  16. class WebDriverPool:
  17. def __init__(
  18. self, pool_size=5, driver_cls=SeleniumDriver, thread_safe=False, **kwargs
  19. ):
  20. """
  21. Args:
  22. pool_size: driver池的大小
  23. driver: 驱动类型
  24. thread_safe: 是否线程安全
  25. 是则每个线程拥有一个driver,pool_size无效,driver数量为线程数
  26. 否则每个线程从池中获取driver
  27. **kwargs:
  28. """
  29. self.pool_size = pool_size
  30. self.driver_cls = driver_cls
  31. self.thread_safe = thread_safe
  32. self.kwargs = kwargs
  33. self.queue = queue.Queue(maxsize=pool_size)
  34. self.lock = threading.RLock()
  35. self.driver_count = 0
  36. self.ctx = threading.local()
  37. @property
  38. def driver(self):
  39. if not hasattr(self.ctx, "driver"):
  40. self.ctx.driver = None
  41. return self.ctx.driver
  42. @driver.setter
  43. def driver(self, driver):
  44. self.ctx.driver = driver
  45. @property
  46. def is_full(self):
  47. return self.driver_count >= self.pool_size
  48. def create_driver(self, user_agent: str = None, proxy: str = None):
  49. kwargs = self.kwargs.copy()
  50. if user_agent:
  51. kwargs["user_agent"] = user_agent
  52. if proxy:
  53. kwargs["proxy"] = proxy
  54. return self.driver_cls(**kwargs)
  55. def get(self, user_agent: str = None, proxy: str = None):
  56. """
  57. 获取webdriver
  58. 当webdriver为新实例时会使用 user_agen, proxy, cookie参数来创建
  59. Args:
  60. user_agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36
  61. proxy: xxx.xxx.xxx.xxx
  62. Returns:
  63. """
  64. if not self.is_full and not self.thread_safe:
  65. with self.lock:
  66. if not self.is_full:
  67. driver = self.create_driver(user_agent, proxy)
  68. self.queue.put(driver)
  69. self.driver_count += 1
  70. elif self.thread_safe:
  71. if not self.driver:
  72. driver = self.create_driver(user_agent, proxy)
  73. self.driver = driver
  74. self.driver_count += 1
  75. if self.thread_safe:
  76. driver = self.driver
  77. else:
  78. driver = self.queue.get()
  79. return driver
  80. def put(self, driver):
  81. if not self.thread_safe:
  82. self.queue.put(driver)
  83. def remove(self, driver):
  84. if self.thread_safe:
  85. if self.driver:
  86. self.driver.quit()
  87. self.driver = None
  88. else:
  89. driver.quit()
  90. self.driver_count -= 1
  91. def close(self):
  92. if self.thread_safe:
  93. log.info("暂不支持关闭需线程安全的driver")
  94. while not self.queue.empty():
  95. driver = self.queue.get()
  96. driver.quit()
  97. self.driver_count -= 1