Browse Source

update:更新webdriver驱动配置

dongzhaorui 2 years ago
parent
commit
d46ca990d9

+ 2 - 2
zgztb_cookie/Dockerfile

@@ -51,9 +51,9 @@ RUN wget https://baibai.ink:88/selenium/firefox/geckodriver/0.31.0/geckodriver &
 # 指定工作目录
 WORKDIR /mnt
 
-# 当前目录下 源文件 复制到容器路径 /app
+# 当前目录下 源文件 复制到 工作目录
 COPY . .
 
-# 安装python项目依赖 和 node 项目依赖
+# 安装 python 项目依赖和 node 项目依赖
 RUN pip3 install -r requirements.txt
 RUN npm install

+ 28 - 27
zgztb_cookie/FworkSpider/feapder/utils/webdriver.py

@@ -94,6 +94,8 @@ class WebDriver(RemoteWebDriver):
         # 设置10秒脚本超时时间
         self.driver.set_script_timeout(self._timeout)
 
+        self._is_remote = not self._usages_local_driver
+
     def __enter__(self):
         return self
 
@@ -104,6 +106,12 @@ class WebDriver(RemoteWebDriver):
         self.get_driver().quit()
         return False
 
+    def __getattr__(self, name):
+        if self.driver:
+            return getattr(self.driver, name)
+        else:
+            raise AttributeError
+
     def get_driver(self):
         return self.driver
 
@@ -112,6 +120,7 @@ class WebDriver(RemoteWebDriver):
         firefox_options = webdriver.FirefoxOptions()
         firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
         firefox_profile.set_preference("dom.webdriver.enabled", False)
+
         if self._proxy:
             proxy = self._proxy() if callable(self._proxy) else self._proxy
             proxy = proxy.replace("socks5://", "")
@@ -162,16 +171,14 @@ class WebDriver(RemoteWebDriver):
         return driver
 
     def remote_firefox_driver(self):
-        firefox_capabilities = {
-            "browserName": "firefox",
-            "platform": "ANY",
-            "version": self._version,
-            "javascriptEnabled": True,
-            "marionette": False,
-        }
         firefox_options = webdriver.FirefoxOptions()
+        desired_capabilities = firefox_options.to_capabilities()
         firefox_options.add_argument("--disable-gpu")
         firefox_options.set_preference("dom.webdriver.enabled", False)
+
+        if self._version:
+            desired_capabilities['version'] = self._version
+
         if self._proxy:
             proxy = self._proxy() if callable(self._proxy) else self._proxy
             proxy = proxy.replace("socks5://", "")
@@ -180,7 +187,6 @@ class WebDriver(RemoteWebDriver):
             firefox_options.set_preference('network.proxy.type', 1)  # 不使用代理:0, 使用代理:1
             firefox_options.set_preference('network.proxy.socks', ip)
             firefox_options.set_preference('network.proxy.socks_port', int(port))
-            # firefox_capabilities["marionette"] = True  # http代理的使用
 
         if self._user_agent:
             firefox_options.set_preference(
@@ -198,7 +204,7 @@ class WebDriver(RemoteWebDriver):
         executor = FirefoxRemoteConnection(remote_server_addr=self._server_addr)
         browser = webdriver.Remote(
             command_executor=executor,
-            desired_capabilities=firefox_capabilities,
+            desired_capabilities=desired_capabilities,
             options=firefox_options
         )
 
@@ -213,24 +219,21 @@ class WebDriver(RemoteWebDriver):
         return self.remote_firefox_driver()
 
     def remote_chrome_driver(self):
-        chrome_capabilities = {
-            "browserName": "chrome",
-            "platform": "ANY",
-            "version": self._version,
-            "javascriptEnabled": True,
-        }
         chrome_options = webdriver.ChromeOptions()
-
+        desired_capabilities = chrome_options.to_capabilities()
         # 此步骤很重要,设置为开发者模式,防止被各大网站识别出来使用了Selenium
         chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
         chrome_options.add_experimental_option("useAutomationExtension", False)
         chrome_options.add_argument('--disable-blink-features=AutomationControlled')
         # docker 里运行需要
-        chrome_options.add_argument("--no-sandbox")
-        chrome_options.add_argument("--disable-gpu")
+        chrome_options.add_argument('--no-sandbox')
+        chrome_options.add_argument('--disable-gpu')
         chrome_options.add_argument('--disable-extensions')
         chrome_options.add_argument('--disable-dev-shm-usage')
 
+        if self._version:
+            desired_capabilities['version'] = self._version
+
         if self._proxy:
             chrome_options.add_argument(
                 "--proxy-server={}".format(
@@ -266,7 +269,7 @@ class WebDriver(RemoteWebDriver):
             command_executor=ChromeRemoteConnection(
                 remote_server_addr=self._server_addr,
                 keep_alive=True),
-            desired_capabilities=chrome_capabilities,
+            desired_capabilities=desired_capabilities,
             options=chrome_options
         )
 
@@ -277,7 +280,7 @@ class WebDriver(RemoteWebDriver):
                 'cmd': 'Page.addScriptToEvaluateOnNewDocument',
                 'params': {'source': js}
             }
-            res = browser.execute("executeCdpCommand", params)['value']
+            response = browser.execute("executeCdpCommand", params)['value']
         return browser
 
     def local_chrome_driver(self):
@@ -371,14 +374,12 @@ class WebDriver(RemoteWebDriver):
         for key, value in val.items():
             self.driver.add_cookie({"name": key, "value": value})
 
-    def __getattr__(self, name):
-        if self.driver:
-            return getattr(self.driver, name)
-        else:
-            raise AttributeError
-
     def quit(self):
-        self.get_driver().quit()
+        try:
+            self.get_driver().quit()
+        except Exception:
+            # We don't care about the message because something probably has gone wrong
+            pass
 
     # def __del__(self):
     #     if self.driver:

+ 4 - 6
zgztb_cookie/FworkSpider/setting.py

@@ -22,8 +22,8 @@ REDISDB_SERVICE_NAME = "quchoong"
 ITEM_PIPELINES = [
     "feapder.pipelines.swordfish.mongo_pipeline.MongoPipeline",
 ]
-EXPORT_DATA_MAX_FAILED_TIMES = 5 # 导出数据时最大的失败次数,包括保存和更新,超过这个次数报警
-EXPORT_DATA_MAX_RETRY_TIMES = 5 # 导出数据时最大的重试次数,包括保存和更新,超过这个次数则放弃重试
+EXPORT_DATA_MAX_FAILED_TIMES = 5  # 导出数据时最大的失败次数,包括保存和更新,超过这个次数报警
+EXPORT_DATA_MAX_RETRY_TIMES = 5  # 导出数据时最大的重试次数,包括保存和更新,超过这个次数则放弃重试
 
 REDIS_KEY = "fwork"
 
@@ -38,7 +38,7 @@ WEBDRIVER = dict(
     pool_size=1,  # 浏览器的数量
     load_images=False,  # 是否加载图片
     user_agent=None,  # 字符串 或 无参函数,返回值为user_agent
-    headless=False,  # 是否为无头浏览器
+    headless=True,  # 是否为无头浏览器
     usages_local_driver=True,  # 是否使用本地驱动,默认启动本地驱动
     proxy=None,  # xxx.xxx.xx.xxx:xxxx 或 无参函数,返回值为代理地址
     driver_type="FIREFOX",  # CHROME、FIREFOX
@@ -67,10 +67,8 @@ LOG_NAME = os.path.basename(os.getcwd())
 LOG_PATH = "logs/%s.log" %(LOG_NAME)  # log存储路径
 LOG_LEVEL = "INFO"
 LOG_COLOR = True  # 是否带有颜色
-LOG_IS_WRITE_TO_CONSOLE = False # 是否打印到控制台
+LOG_IS_WRITE_TO_CONSOLE = False  # 是否打印到控制台
 LOG_IS_WRITE_TO_FILE = True  # 是否写文件
-LOG_IS_SEND_TO_LOGSTASH = True
-
 LOG_MODE = "w"  # 写文件的模式
 LOG_MAX_BYTES = 10 * 1024 * 1024  # 每个日志文件的最大字节数
 LOG_BACKUP_COUNT = 20  # 日志文件保留数量