webdriver.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import datetime
  2. from collections import namedtuple
  3. from pathlib import Path
  4. from selenium import webdriver
  5. from selenium.common.exceptions import WebDriverException
  6. from selenium.webdriver import Firefox
  7. from common.log import logger
  8. _absolute = Path(__file__).absolute().parent.parent.parent
  9. _date = datetime.datetime.now().strftime('%Y-%m-%d')
  10. SERVICE_LOG_PATH = (_absolute / f'logs/geckodriver-{_date}.log').resolve()
  11. DEFAULT_USERAGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0"
  12. Netloc = namedtuple('Netloc', ['host', 'port'])
  13. def netloc(proxies: dict) -> Netloc:
  14. host, port = proxies["https"].replace("socks5://", "").split(":")
  15. return Netloc(host, port)
  16. class FireFoxWebDriverError(WebDriverException):
  17. pass
  18. class WebDriver(Firefox):
  19. def __init__(self, load_images=True, user_agent=None, proxy=None,
  20. headless=True, timeout=60, log_path=None,
  21. window_size=(1024, 800), executable_path=None,
  22. custom_argument=None, **kwargs):
  23. """
  24. Args:
  25. load_images: 是否加载图片
  26. user_agent: 字符串 或 无参函数,返回值为user_agent
  27. proxy: {'https://sockets:xxx.xxx.xxx.xxx:xxxx'} 或 无参函数,返回值为代理地址
  28. headless: 是否启用无头模式
  29. driver_type: FIREFOX
  30. timeout: 请求超时时间
  31. log_path: Geckodriver服务的日志文件路径
  32. window_size: 窗口大小
  33. executable_path: 浏览器路径,默认为默认路径
  34. custom_argument: 自定义配置参数
  35. **kwargs: 需要额外配置的Firefox参数
  36. """
  37. self._load_images = load_images
  38. self._user_agent = user_agent or DEFAULT_USERAGENT
  39. self._proxy = proxy
  40. self._headless = headless
  41. self._timeout = timeout
  42. self._window_size = window_size
  43. self._executable_path = executable_path
  44. self._custom_argument = custom_argument
  45. self._service_log_path = log_path or str(SERVICE_LOG_PATH)
  46. _profile = webdriver.FirefoxProfile()
  47. _options = webdriver.FirefoxOptions()
  48. firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
  49. _profile.set_preference("dom.webdriver.enabled", False)
  50. _profile.set_preference('useAutomationExtension', False)
  51. # _profile.set_preference('privacy.resistFingerprinting', True) # 启用指纹保护
  52. if self._proxy:
  53. proxy = self._proxy() if callable(self._proxy) else self._proxy
  54. host, port = netloc(proxy)
  55. # 使用socks5 代理, 不使用代理:0, 使用代理:1
  56. _profile.set_preference('network.proxy.type', 1)
  57. _profile.set_preference('network.proxy.socks', host)
  58. _profile.set_preference('network.proxy.socks_port', int(port))
  59. if self._user_agent:
  60. _profile.set_preference(
  61. "general.useragent.override",
  62. self._user_agent() if callable(self._user_agent) else self._user_agent,
  63. )
  64. if not self._load_images:
  65. '''
  66. 允许加载所有图像,无论来源如何(默认)=1
  67. 阻止所有图像加载=2
  68. 防止加载第三方图像=3
  69. '''
  70. _profile.set_preference("permissions.default.image", 2)
  71. _profile.update_preferences()
  72. if self._headless:
  73. _options.add_argument("--headless")
  74. _options.add_argument("--disable-gpu")
  75. if self._custom_argument:
  76. [_options.add_argument(arg) for arg in self._custom_argument]
  77. if self._executable_path:
  78. super(WebDriver, self).__init__(
  79. service_log_path=self._service_log_path,
  80. capabilities=firefox_capabilities,
  81. options=_options,
  82. firefox_profile=_profile,
  83. executable_path=self._executable_path,
  84. **kwargs
  85. )
  86. else:
  87. super(WebDriver, self).__init__(
  88. service_log_path=self._service_log_path,
  89. capabilities=firefox_capabilities,
  90. options=_options,
  91. firefox_profile=_profile,
  92. **kwargs
  93. )
  94. if self._window_size:
  95. self.set_window_size(*self._window_size)
  96. self.set_page_load_timeout(self._timeout)
  97. self.set_script_timeout(self._timeout)
  98. def __enter__(self):
  99. return self
  100. def __exit__(self, exc_type, exc_val, exc_tb):
  101. if exc_val:
  102. logger.exception(f'{self.__class__.__name__} <> {exc_type.__name__}: {exc_val}')
  103. self.quit()
  104. print("关闭浏览器")
  105. return True
  106. @property
  107. def cookies(self):
  108. cookies_json = {}
  109. for cookie in self.get_cookies():
  110. cookies_json[cookie["name"]] = cookie["value"]
  111. return cookies_json
  112. @cookies.setter
  113. def cookies(self, val: dict):
  114. """
  115. 设置cookie
  116. Args:
  117. val: {"key":"value", "key2":"value2"}
  118. Returns:
  119. """
  120. for key, value in val.items():
  121. self.add_cookie({"name": key, "value": value})