123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- import datetime
- from collections import namedtuple
- from pathlib import Path
- from selenium import webdriver
- from selenium.common.exceptions import WebDriverException
- from selenium.webdriver import Firefox
- from common.log import logger
- _absolute = Path(__file__).absolute().parent.parent.parent
- _date = datetime.datetime.now().strftime('%Y-%m-%d')
- SERVICE_LOG_PATH = (_absolute / f'logs/geckodriver-{_date}.log').resolve()
- DEFAULT_USERAGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0"
- Netloc = namedtuple('Netloc', ['host', 'port'])
- def netloc(proxies: dict) -> Netloc:
- host, port = proxies["https"].replace("socks5://", "").split(":")
- return Netloc(host, port)
- class FireFoxWebDriverError(WebDriverException):
- pass
- class WebDriver(Firefox):
- def __init__(self, load_images=True, user_agent=None, proxy=None,
- headless=True, timeout=60, log_path=None,
- window_size=(1024, 800), executable_path=None,
- custom_argument=None, **kwargs):
- """
- Args:
- load_images: 是否加载图片
- user_agent: 字符串 或 无参函数,返回值为user_agent
- proxy: {'https://sockets:xxx.xxx.xxx.xxx:xxxx'} 或 无参函数,返回值为代理地址
- headless: 是否启用无头模式
- driver_type: FIREFOX
- timeout: 请求超时时间
- log_path: Geckodriver服务的日志文件路径
- window_size: 窗口大小
- executable_path: 浏览器路径,默认为默认路径
- custom_argument: 自定义配置参数
- **kwargs: 需要额外配置的Firefox参数
- """
- self._load_images = load_images
- self._user_agent = user_agent or DEFAULT_USERAGENT
- self._proxy = proxy
- self._headless = headless
- self._timeout = timeout
- self._window_size = window_size
- self._executable_path = executable_path
- self._custom_argument = custom_argument
- self._service_log_path = log_path or str(SERVICE_LOG_PATH)
- _profile = webdriver.FirefoxProfile()
- _options = webdriver.FirefoxOptions()
- firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
- _profile.set_preference("dom.webdriver.enabled", False)
- _profile.set_preference('useAutomationExtension', False)
- # _profile.set_preference('privacy.resistFingerprinting', True) # 启用指纹保护
- if self._proxy:
- proxy = self._proxy() if callable(self._proxy) else self._proxy
- host, port = netloc(proxy)
- # 使用socks5 代理, 不使用代理:0, 使用代理:1
- _profile.set_preference('network.proxy.type', 1)
- _profile.set_preference('network.proxy.socks', host)
- _profile.set_preference('network.proxy.socks_port', int(port))
- if self._user_agent:
- _profile.set_preference(
- "general.useragent.override",
- self._user_agent() if callable(self._user_agent) else self._user_agent,
- )
- if not self._load_images:
- '''
- 允许加载所有图像,无论来源如何(默认)=1
- 阻止所有图像加载=2
- 防止加载第三方图像=3
- '''
- _profile.set_preference("permissions.default.image", 2)
- _profile.update_preferences()
- if self._headless:
- _options.add_argument("--headless")
- _options.add_argument("--disable-gpu")
- if self._custom_argument:
- [_options.add_argument(arg) for arg in self._custom_argument]
- if self._executable_path:
- super(WebDriver, self).__init__(
- service_log_path=self._service_log_path,
- capabilities=firefox_capabilities,
- options=_options,
- firefox_profile=_profile,
- executable_path=self._executable_path,
- **kwargs
- )
- else:
- super(WebDriver, self).__init__(
- service_log_path=self._service_log_path,
- capabilities=firefox_capabilities,
- options=_options,
- firefox_profile=_profile,
- **kwargs
- )
- if self._window_size:
- self.set_window_size(*self._window_size)
- self.set_page_load_timeout(self._timeout)
- self.set_script_timeout(self._timeout)
- def __enter__(self):
- return self
- def __exit__(self, exc_type, exc_val, exc_tb):
- if exc_val:
- logger.exception(f'{self.__class__.__name__} <> {exc_type.__name__}: {exc_val}')
- self.quit()
- print("关闭浏览器")
- return True
- @property
- def cookies(self):
- cookies_json = {}
- for cookie in self.get_cookies():
- cookies_json[cookie["name"]] = cookie["value"]
- return cookies_json
- @cookies.setter
- def cookies(self, val: dict):
- """
- 设置cookie
- Args:
- val: {"key":"value", "key2":"value2"}
- Returns:
- """
- for key, value in val.items():
- self.add_cookie({"name": key, "value": value})
|