|
@@ -319,18 +319,27 @@ class BaseBusinessDetailSpider(Spider):
|
|
|
)
|
|
|
|
|
|
def failed_request(self, request, response):
|
|
|
- """请求、解析错误次数超过上限后,添加failed字段,将错误信息重新保存至数据库"""
|
|
|
- failed_info = request.list_info
|
|
|
- failed_times = int(failed_info.pop("failed_times", 0)) + 1
|
|
|
- failed_item = Item(
|
|
|
- **failed_info,
|
|
|
+ """请求、解析错误次数超过上限后,记录错误详情信息"""
|
|
|
+ failed_request_info = request.list_info
|
|
|
+ failed_times = int(failed_request_info.pop("failed_times", 0)) + 1
|
|
|
+ failed_items = dict(
|
|
|
+ state=3, # 待采集任务失败采集状态[3=采集失败]
|
|
|
failed_times=failed_times,
|
|
|
- err_reason=getattr(request, "error_msg", ""),
|
|
|
+ reason=getattr(request, "error_msg", ""),
|
|
|
status_code=getattr(response, "status_code", -1),
|
|
|
- create_at=tools.ensure_int64(tools.get_current_timestamp())
|
|
|
+ create_at=tools.ensure_int64(tools.get_current_timestamp()),
|
|
|
+ **failed_request_info,
|
|
|
)
|
|
|
- failed_item.table_name = setting.TASK_FAILED
|
|
|
- yield failed_item
|
|
|
+ if 'queue_name' not in failed_items:
|
|
|
+ failed_items['queue_name'] = setting.TAB_ITEMS.format(
|
|
|
+ redis_key=self._redis_key.replace('_detailc', '')
|
|
|
+ )
|
|
|
+
|
|
|
+ def callback():
|
|
|
+ # 推送采集失败任务信息
|
|
|
+ self._rabbitmq.add(setting.TASK_CRAWL_STATE, failed_items)
|
|
|
+
|
|
|
+ yield callback
|
|
|
|
|
|
def consume_tasks(self, limit=None, auto_ack=True):
|
|
|
"""
|