Sfoglia il codice sorgente

删除批次爬虫相关代码

dongzhaorui 1 anno fa
parent
commit
04b281678f
1 ha cambiato i file con 2 aggiunte e 19 eliminazioni
  1. 2 19
      FworkSpider/feapder/core/spiders/spider.py

+ 2 - 19
FworkSpider/feapder/core/spiders/spider.py

@@ -24,7 +24,7 @@ CONSOLE_PIPELINE_PATH = "feapder.pipelines.console_pipeline.ConsolePipeline"
 
 class Spider(
     BaseParser, Scheduler
-):  # threading 中有name函数, 必须先继承BaseParser 否则其内部的name会被Schedule的基类threading.Thread的name覆盖
+):  # threading 中有name函数, 必须先继承BaseParser 否则其内部的name会被Schedule的基类覆盖threading.Thread的name
     """
     @summary: 为了简化搭建爬虫
     ---------
@@ -33,32 +33,24 @@ class Spider(
     def __init__(
         self,
         redis_key=None,
-        min_task_count=1,
         check_task_interval=5,
         thread_count=None,
         begin_callback=None,
         end_callback=None,
-        delete_keys=(),
         keep_alive=None,
         auto_start_requests=None,
-        batch_interval=0,
-        wait_lock=True,
         **kwargs
     ):
         """
         @summary: 爬虫
         ---------
         @param redis_key: 任务等数据存放在redis中的key前缀
-        @param min_task_count: 任务队列中最少任务数, 少于这个数量才会添加任务,默认1。start_monitor_task 模式下生效
         @param check_task_interval: 检查是否还有任务的时间间隔;默认5秒
         @param thread_count: 线程数,默认为配置文件中的线程数
         @param begin_callback: 爬虫开始回调函数
         @param end_callback: 爬虫结束回调函数
-        @param delete_keys: 爬虫启动时删除的key,类型: 元组/bool/string。 支持正则; 常用于清空任务队列,否则重启时会断点续爬
         @param keep_alive: 爬虫是否常驻
         @param auto_start_requests: 爬虫是否自动添加任务
-        @param batch_interval: 抓取时间间隔 默认为0 天为单位 多次启动时,只有当前时间与第一次抓取结束的时间间隔大于指定的时间间隔时,爬虫才启动
-        @param wait_lock: 下发任务时否等待锁,若不等待锁,可能会存在多进程同时在下发一样的任务,因此分布式环境下请将该值设置True
         ---------
         @result:
         """
@@ -67,17 +59,12 @@ class Spider(
             thread_count=thread_count,
             begin_callback=begin_callback,
             end_callback=end_callback,
-            delete_keys=delete_keys,
             keep_alive=keep_alive,
             auto_start_requests=auto_start_requests,
-            batch_interval=batch_interval,
-            wait_lock=wait_lock,
             **kwargs
         )
 
-        self._min_task_count = min_task_count
         self._check_task_interval = check_task_interval
-
         self._is_distributed_task = False
         self._is_show_not_task = False
 
@@ -308,7 +295,7 @@ class BusinessBaseDetailSpider(Spider):
         ITEM_FILTER_ENABLE=False
     )
 
-    err_coll_name = "listdata_err"
+    err_coll_name = "listdata_err"  # 详情采集失败时存放的详情任务数据的表
     _to_db = None
 
     def __init__(
@@ -320,8 +307,6 @@ class BusinessBaseDetailSpider(Spider):
             delete_keys=(),
             keep_alive=None,
             auto_start_requests=None,
-            batch_interval=0,
-            wait_lock=True,
             **kwargs
     ):
         self.__class__.__custom_setting__.update(
@@ -335,8 +320,6 @@ class BusinessBaseDetailSpider(Spider):
             delete_keys=delete_keys,
             keep_alive=keep_alive,
             auto_start_requests=auto_start_requests,
-            batch_interval=batch_interval,
-            wait_lock=wait_lock,
             **kwargs
         )