|
@@ -53,12 +53,15 @@ class Spider(BaseParser, Thread):
|
|
|
self.task_api_auth_token = None
|
|
|
|
|
|
def distribute_task(self):
|
|
|
- for request in self.start_requests():
|
|
|
- if not isinstance(request, Request):
|
|
|
- raise ValueError("仅支持 yield Request")
|
|
|
+ try:
|
|
|
+ for request in self.start_requests():
|
|
|
+ if not isinstance(request, Request):
|
|
|
+ raise ValueError("仅支持 yield Request")
|
|
|
|
|
|
- request.parser_name = request.parser_name or self.name
|
|
|
- self._memory_db.add(request)
|
|
|
+ request.parser_name = request.parser_name or self.name
|
|
|
+ self._memory_db.add(request)
|
|
|
+ except IOError:
|
|
|
+ log.error("distribute task failed")
|
|
|
|
|
|
def all_thread_is_done(self):
|
|
|
for i in range(3):
|
|
@@ -211,12 +214,13 @@ class BaseBusinessDetailSpider(Spider):
|
|
|
yield failed_item
|
|
|
|
|
|
def get_tasks(self, limit=None, **kwargs):
|
|
|
+ timeout = kwargs.pop("timeout", 10)
|
|
|
queue = setting.TAB_ITEMS.format(redis_key=self._redis_key.replace("_detailc", ""))
|
|
|
|
|
|
# 获取任务
|
|
|
url = f"{setting.JY_TASK_URL}/tasks/fd?qn={queue}&limit={limit}"
|
|
|
headers = {"Authorization": self.task_api_auth_token}
|
|
|
- params = dict(headers=headers, timeout=10, proxies=False)
|
|
|
+ params = dict(headers=headers, timeout=timeout, proxies=False)
|
|
|
response = Request(method="GET", url=url, **params).get_response()
|
|
|
ret = response.json["task"]
|
|
|
self.tasks_dict = {
|