import datetime from collections import namedtuple from pathlib import Path from selenium import webdriver from selenium.common.exceptions import WebDriverException from selenium.webdriver import Firefox from common.log import logger _absolute = Path(__file__).absolute().parent.parent.parent _date = datetime.datetime.now().strftime('%Y-%m-%d') SERVICE_LOG_PATH = (_absolute / f'logs/geckodriver-{_date}.log').resolve() DEFAULT_USERAGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0" Netloc = namedtuple('Netloc', ['host', 'port']) def netloc(proxies: dict) -> Netloc: host, port = proxies["https"].replace("socks5://", "").split(":") return Netloc(host, port) class FireFoxWebDriverError(WebDriverException): pass class WebDriver(Firefox): def __init__(self, load_images=True, user_agent=None, proxy=None, headless=True, timeout=60, log_path=None, window_size=(1024, 800), executable_path=None, custom_argument=None, **kwargs): """ Args: load_images: 是否加载图片 user_agent: 字符串 或 无参函数,返回值为user_agent proxy: {'https://sockets:xxx.xxx.xxx.xxx:xxxx'} 或 无参函数,返回值为代理地址 headless: 是否启用无头模式 driver_type: FIREFOX timeout: 请求超时时间 log_path: Geckodriver服务的日志文件路径 window_size: 窗口大小 executable_path: 浏览器路径,默认为默认路径 custom_argument: 自定义配置参数 **kwargs: 需要额外配置的Firefox参数 """ self._load_images = load_images self._user_agent = user_agent or DEFAULT_USERAGENT self._proxy = proxy self._headless = headless self._timeout = timeout self._window_size = window_size self._executable_path = executable_path self._custom_argument = custom_argument self._service_log_path = log_path or str(SERVICE_LOG_PATH) _profile = webdriver.FirefoxProfile() _options = webdriver.FirefoxOptions() firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX _profile.set_preference("dom.webdriver.enabled", False) _profile.set_preference('useAutomationExtension', False) # _profile.set_preference('privacy.resistFingerprinting', True) # 启用指纹保护 if self._proxy: proxy = self._proxy() if callable(self._proxy) else self._proxy host, port = netloc(proxy) # 使用socks5 代理, 不使用代理:0, 使用代理:1 _profile.set_preference('network.proxy.type', 1) _profile.set_preference('network.proxy.socks', host) _profile.set_preference('network.proxy.socks_port', int(port)) if self._user_agent: _profile.set_preference( "general.useragent.override", self._user_agent() if callable(self._user_agent) else self._user_agent, ) if not self._load_images: ''' 允许加载所有图像,无论来源如何(默认)=1 阻止所有图像加载=2 防止加载第三方图像=3 ''' _profile.set_preference("permissions.default.image", 2) _profile.update_preferences() if self._headless: _options.add_argument("--headless") _options.add_argument("--disable-gpu") if self._custom_argument: [_options.add_argument(arg) for arg in self._custom_argument] if self._executable_path: super(WebDriver, self).__init__( service_log_path=self._service_log_path, capabilities=firefox_capabilities, options=_options, firefox_profile=_profile, executable_path=self._executable_path, **kwargs ) else: super(WebDriver, self).__init__( service_log_path=self._service_log_path, capabilities=firefox_capabilities, options=_options, firefox_profile=_profile, **kwargs ) if self._window_size: self.set_window_size(*self._window_size) self.set_page_load_timeout(self._timeout) self.set_script_timeout(self._timeout) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): if exc_val: logger.exception(f'{self.__class__.__name__} <> {exc_type.__name__}: {exc_val}') self.quit() print("关闭浏览器") return True @property def cookies(self): cookies_json = {} for cookie in self.get_cookies(): cookies_json[cookie["name"]] = cookie["value"] return cookies_json @cookies.setter def cookies(self, val: dict): """ 设置cookie Args: val: {"key":"value", "key2":"value2"} Returns: """ for key, value in val.items(): self.add_cookie({"name": key, "value": value})