|
@@ -144,6 +144,8 @@ class Scheduler(threading.Thread):
|
|
|
raise ValueError("类型错误,爬虫需继承feapder.BaseParser或feapder.BatchParser")
|
|
|
|
|
|
def _start(self):
|
|
|
+ self.spider_begin() # 启动爬虫 -- start_callback
|
|
|
+
|
|
|
# 将失败的item入库
|
|
|
if setting.RETRY_FAILED_ITEMS:
|
|
|
handle_failed_items = HandleFailedItems(
|
|
@@ -213,8 +215,6 @@ class Scheduler(threading.Thread):
|
|
|
tools.delay_time(1)
|
|
|
|
|
|
def __add_task(self):
|
|
|
- self.spider_begin() # 启动爬虫 -- start_requests
|
|
|
-
|
|
|
# 判断任务池中属否还有任务,若有接着抓取,若无则生产新任务
|
|
|
todo_task_count = self._collector.get_requests_count()
|
|
|
if todo_task_count:
|
|
@@ -382,8 +382,8 @@ class Scheduler(threading.Thread):
|
|
|
if self._begin_callback:
|
|
|
self._begin_callback()
|
|
|
|
|
|
+ parameter = self.get_argvs()
|
|
|
for parser in self._parsers:
|
|
|
- parameter = self.get_argvs()
|
|
|
parser.platform_next_page = parameter.next_page
|
|
|
parser.platform_max_page = parameter.max_page
|
|
|
parser.start_callback()
|