webdriver.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. import datetime
  2. from collections import namedtuple
  3. from pathlib import Path
  4. from selenium import webdriver
  5. from selenium.common.exceptions import WebDriverException
  6. from selenium.webdriver.common.by import By
  7. from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
  8. from selenium.webdriver.support import expected_conditions as EC
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from common.log import logger
  11. _absolute = Path(__file__).absolute().parent.parent
  12. _date = datetime.datetime.now().strftime('%Y-%m-%d')
  13. _service_log_path = (_absolute / f'logs/geckodriver-{_date}.log').resolve()
  14. DEFAULT_USERAGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0"
  15. Netloc = namedtuple('Netloc', ['host', 'port'])
  16. def check_navigator(driver):
  17. script = "return window.navigator.webdriver"
  18. return driver.execute_script(script)
  19. def netloc(proxies: dict) -> Netloc:
  20. host, port = proxies["https"].replace("socks5://", "").split(":")
  21. return Netloc(host, port)
  22. class FireFoxWebDriverError(WebDriverException):
  23. pass
  24. class WebDriver(RemoteWebDriver):
  25. FIREFOX = "FIREFOX"
  26. def __init__(
  27. self,
  28. load_images=True,
  29. user_agent=None,
  30. proxy=None,
  31. headless=True,
  32. driver_type=FIREFOX,
  33. timeout=120,
  34. window_size=(1024, 800),
  35. executable_path=None,
  36. custom_argument=None,
  37. **kwargs
  38. ):
  39. """
  40. Args:
  41. load_images: 是否加载图片
  42. user_agent: 字符串 或 无参函数,返回值为user_agent
  43. proxy: {'https://sockets:xxx.xxx.xxx.xxx:xxxx'} 或 无参函数,返回值为代理地址
  44. headless: 是否启用无头模式
  45. driver_type: FIREFOX
  46. timeout: 请求超时时间
  47. window_size: # 窗口大小
  48. executable_path: 浏览器路径,默认为默认路径
  49. **kwargs:
  50. """
  51. self._load_images = load_images
  52. self._user_agent = user_agent or DEFAULT_USERAGENT
  53. self._proxy = proxy
  54. self._headless = headless
  55. self._timeout = timeout
  56. self._window_size = window_size
  57. self._executable_path = executable_path
  58. self._custom_argument = custom_argument
  59. self.proxies = {}
  60. self.user_agent = None
  61. if driver_type == WebDriver.FIREFOX:
  62. self.driver = self.firefox_driver()
  63. self.driver.set_page_load_timeout(self._timeout)
  64. self.driver.set_script_timeout(self._timeout)
  65. def __enter__(self):
  66. return self
  67. def __exit__(self, exc_type, exc_val, exc_tb):
  68. if exc_val:
  69. logger.error(f'{self.__class__.__name__} <> {exc_type.__name__}: {exc_val}')
  70. self.driver.quit()
  71. return True
  72. def firefox_driver(self):
  73. firefox_profile = webdriver.FirefoxProfile()
  74. firefox_options = webdriver.FirefoxOptions()
  75. firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
  76. firefox_profile.set_preference("dom.webdriver.enabled", False)
  77. firefox_profile.set_preference('useAutomationExtension', False)
  78. # firefox_profile.set_preference('privacy.resistFingerprinting', True) # 启用指纹保护
  79. if self._proxy:
  80. proxy = self._proxy() if callable(self._proxy) else self._proxy
  81. host, port = netloc(proxy)
  82. # 使用socks5 代理, 不使用代理:0, 使用代理:1
  83. firefox_profile.set_preference('network.proxy.type', 1)
  84. firefox_profile.set_preference('network.proxy.socks', host)
  85. firefox_profile.set_preference('network.proxy.socks_port', int(port))
  86. if self._user_agent:
  87. firefox_profile.set_preference(
  88. "general.useragent.override",
  89. self._user_agent() if callable(self._user_agent) else self._user_agent,
  90. )
  91. if not self._load_images:
  92. '''
  93. 允许加载所有图像,无论来源如何(默认)=1
  94. 阻止所有图像加载=2
  95. 防止加载第三方图像=3
  96. '''
  97. firefox_profile.set_preference("permissions.default.image", 2)
  98. firefox_profile.update_preferences()
  99. if self._headless:
  100. firefox_options.add_argument("--headless")
  101. firefox_options.add_argument("--disable-gpu")
  102. # 添加自定义的配置参数
  103. if self._custom_argument:
  104. for arg in self._custom_argument:
  105. firefox_options.add_argument(arg)
  106. if self._executable_path:
  107. driver = webdriver.Firefox(
  108. service_log_path=str(_service_log_path),
  109. capabilities=firefox_capabilities,
  110. options=firefox_options,
  111. firefox_profile=firefox_profile,
  112. executable_path=self._executable_path,
  113. )
  114. else:
  115. driver = webdriver.Firefox(
  116. service_log_path=str(_service_log_path),
  117. capabilities=firefox_capabilities,
  118. options=firefox_options,
  119. firefox_profile=firefox_profile,
  120. )
  121. if self._window_size:
  122. driver.set_window_size(*self._window_size)
  123. return driver
  124. def quit(self):
  125. self.driver.quit()
  126. @property
  127. def cookies(self):
  128. cookies_json = {}
  129. for cookie in self.driver.get_cookies():
  130. cookies_json[cookie["name"]] = cookie["value"]
  131. return cookies_json
  132. @cookies.setter
  133. def cookies(self, val: dict):
  134. """
  135. 设置cookie
  136. Args:
  137. val: {"key":"value", "key2":"value2"}
  138. Returns:
  139. """
  140. for key, value in val.items():
  141. self.driver.add_cookie({"name": key, "value": value})
  142. def __getattr__(self, name):
  143. if self.driver:
  144. return getattr(self.driver, name)
  145. else:
  146. raise AttributeError
  147. def get_user_agent(driver):
  148. return driver.execute_script("return navigator.userAgent;")
  149. def get_title(driver):
  150. return driver.execute_script('return document.title')
  151. def until_wait(
  152. driver,
  153. *,
  154. xpath=None,
  155. classname=None,
  156. text=None,
  157. timeout=None
  158. ):
  159. """
  160. 显示等待页面加载,否则抛出TimeoutException
  161. :param driver: 浏览器驱动
  162. :param xpath: xpath规则,页面等待特征
  163. :param classname: class属性名称,页面等待特征
  164. :param text: 期待的文本
  165. :param timeout: 超时时间
  166. :return:
  167. """
  168. _timeout = (timeout or 60)
  169. wait = WebDriverWait(driver, _timeout, 0.2)
  170. if xpath is not None:
  171. locator = (By.XPATH, xpath)
  172. if text is not None:
  173. wait.until(EC.text_to_be_present_in_element(locator, text))
  174. else:
  175. wait.until(EC.presence_of_element_located(locator))
  176. elif classname is not None:
  177. locator = (By.CLASS_NAME, classname)
  178. if text is not None:
  179. wait.until(EC.text_to_be_present_in_element(locator, text))
  180. else:
  181. wait.until(EC.presence_of_element_located(locator))
  182. def new_window(driver):
  183. """新的窗口"""
  184. driver.execute_script('window.open();')
  185. handles = driver.window_handles
  186. driver.switch_to.window(handles[-1])