|
@@ -90,19 +90,15 @@ class Spider(BaseParser, Thread):
|
|
return True
|
|
return True
|
|
|
|
|
|
def register_task_api_token(self):
|
|
def register_task_api_token(self):
|
|
- if setting.REGISTER_TASK_TOKEN:
|
|
|
|
- # 任务中心释放采集任务控制标识
|
|
|
|
- self._item_buffer.release_task_enable = True
|
|
|
|
|
|
+ if self.task_api_auth_token is None:
|
|
|
|
+ token_url = f"{setting.JY_TASK_URL}/tasks/token"
|
|
|
|
+ data = {"username": "spider@py", "password": "123@qweA!"}
|
|
|
|
+ auth_params = dict(url=token_url, timeout=10, data=data, proxies=False)
|
|
|
|
+ response = Request(method="GET", **auth_params).get_response(show_log=False)
|
|
|
|
+ token = response.json["token"]
|
|
|
|
+ self.task_api_auth_token = token
|
|
|
|
|
|
- if self.task_api_auth_token is None:
|
|
|
|
- token_url = f"{setting.JY_TASK_URL}/tasks/token"
|
|
|
|
- data = {"username": "spider@py", "password": "123@qweA!"}
|
|
|
|
- auth_params = dict(url=token_url, timeout=10, data=data, proxies=False)
|
|
|
|
- response = Request(method="GET", **auth_params).get_response(show_log=False)
|
|
|
|
- token = response.json["token"]
|
|
|
|
- self.task_api_auth_token = token
|
|
|
|
-
|
|
|
|
- log.debug(f"register api token:{self.task_api_auth_token}")
|
|
|
|
|
|
+ log.debug(f"register api token:{self.task_api_auth_token}")
|
|
|
|
|
|
def run(self): # 调度控制流程起始
|
|
def run(self): # 调度控制流程起始
|
|
self.start_callback()
|
|
self.start_callback()
|
|
@@ -124,9 +120,8 @@ class Spider(BaseParser, Thread):
|
|
self.register_task_api_token()
|
|
self.register_task_api_token()
|
|
# 派发任务
|
|
# 派发任务
|
|
self.distribute_task()
|
|
self.distribute_task()
|
|
- # 已派发任务加入 item_buffer 缓存容器
|
|
|
|
- if self._item_buffer.release_task_enable:
|
|
|
|
- self._item_buffer.tasks_dict.update(self.tasks_dict)
|
|
|
|
|
|
+ # 派发任务加入 item_buffer 缓存容器(派发任务通过任务中心领取)
|
|
|
|
+ self._item_buffer.tasks_dict.update(self.tasks_dict)
|
|
|
|
|
|
while True:
|
|
while True:
|
|
try:
|
|
try:
|
|
@@ -150,9 +145,8 @@ class Spider(BaseParser, Thread):
|
|
|
|
|
|
tools.delay_time(1) # 1秒钟检查一次爬虫状态
|
|
tools.delay_time(1) # 1秒钟检查一次爬虫状态
|
|
|
|
|
|
- if self._item_buffer.release_task_enable:
|
|
|
|
- # 释放剩余未完成的任务
|
|
|
|
- self._item_buffer.release_tasks(self.tasks_dict, finished=False)
|
|
|
|
|
|
+ # 释放剩余未完成的任务
|
|
|
|
+ self._item_buffer.release_tasks(self.tasks_dict, finished=False)
|
|
|
|
|
|
self.end_callback()
|
|
self.end_callback()
|
|
self._started.clear() # 为了线程可重复start
|
|
self._started.clear() # 为了线程可重复start
|