Explorar el Código

新增drissionpage下载器

dongzhaorui hace 2 meses
padre
commit
15f96d5f08

+ 9 - 5
FworkSpider/feapder/core/parser_control.py

@@ -388,8 +388,8 @@ class PaserControl(threading.Thread):
 
                 finally:
                     # 释放浏览器
-                    if response and hasattr(response, "browser"):
-                        request._webdriver_pool.put(response.browser)
+                    if response and getattr(response, "browser", None):
+                        request.render_downloader.put_back(response.browser)
 
                     # 发布心跳
                     self.publish_heartbeat(parser, request, response, **counter)
@@ -742,8 +742,8 @@ class AirSpiderParserControl(PaserControl):
 
                 finally:
                     # 释放浏览器
-                    if response and hasattr(response, "browser"):
-                        request._webdriver_pool.put(response.browser)
+                    if response and getattr(response, "browser", None):
+                        request.render_downloader.put_back(response.browser)
 
                 break
 
@@ -1001,6 +1001,10 @@ class JySpiderParserControl(PaserControl):
                                     % (parser.name, "failed_request")
                                 )
 
+                            for result in results:
+                                if isinstance(result, Item):
+                                    self._item_buffer.put_item(result)
+
                             log.info(
                                 """
                                 任务超过最大重试次数,丢弃
@@ -1046,7 +1050,7 @@ class JySpiderParserControl(PaserControl):
                 finally:
                     # 释放浏览器
                     if response and getattr(response, "browser", None):
-                        request._webdriver_pool.put(response.browser)
+                        request.render_downloader.put_back(response.browser)
 
                     self.publish_heartbeat(parser, request, response, **counter)
 

+ 2 - 2
FworkSpider/feapder/core/scheduler.py

@@ -394,8 +394,8 @@ class Scheduler(threading.Thread):
             parser.end_callback()  # 调用结束回调函数
 
         if not self._keep_alive:
-            if Request.webdriver_pool:
-                Request.webdriver_pool.close()  # 关闭 webdriver 管理池
+            # 关闭webdirver
+            Request.render_downloader and Request.render_downloader.close_all()
 
             metrics.close()  # 关闭打点
         else:

+ 1 - 2
FworkSpider/feapder/core/spiders/air_spider.py

@@ -101,8 +101,7 @@ class AirSpider(BaseParser, Thread):
                     self._item_buffer.stop()
 
                     # 关闭webdirver
-                    if Request.webdriver_pool:
-                        Request.webdriver_pool.close()
+                    Request.render_downloader and Request.render_downloader.close_all()
 
                     log.info("无任务,爬虫结束")
                     break

+ 4 - 4
FworkSpider/feapder/core/spiders/spider.py

@@ -137,8 +137,7 @@ class Spider(BaseParser, Thread):
                     self._heartbeat_buffer.stop()  # 关闭 heartbeat_buffer
 
                     # 关闭 webdriver
-                    if Request.webdriver_pool:
-                        Request.webdriver_pool.close()
+                    Request.render_downloader and Request.render_downloader.close_all()
 
                     log.info("无任务,爬虫结束")
                     break
@@ -214,6 +213,7 @@ class BaseBusinessDetailSpider(Spider):
         yield failed_item
 
     def get_tasks(self, limit=None, **kwargs):
+        show_log = kwargs.pop("show_log", False)
         timeout = kwargs.pop("timeout", 10)
         queue = setting.TAB_ITEMS.format(redis_key=self._redis_key.replace("_detailc", ""))
 
@@ -221,8 +221,8 @@ class BaseBusinessDetailSpider(Spider):
         url = f"{setting.JY_TASK_URL}/tasks/fd?qn={queue}&limit={limit}"
         headers = {"Authorization": self.task_api_auth_token}
         params = dict(headers=headers, timeout=timeout, proxies=False)
-        response = Request(method="GET", url=url, **params).get_response()
-        ret = response.json["task"]
+        r = Request(method="GET", url=url, **params).get_response(show_log=show_log)
+        ret = r.json["task"]
         self.tasks_dict = {
             "token": self.task_api_auth_token,
             "data": {t["pyuuid"]: {"tid": t["tid"], "queue": queue} for t in ret}

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 0 - 0
FworkSpider/feapder/utils/js/intercept.js


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 1 - 1
FworkSpider/feapder/utils/js/stealth.min.js


Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio