Browse Source

错误详情新增采集队列属性

dongzhaorui 1 year ago
parent
commit
456b3944a5
1 changed files with 18 additions and 9 deletions
  1. 18 9
      FworkSpider/feapder/core/spiders/spider.py

+ 18 - 9
FworkSpider/feapder/core/spiders/spider.py

@@ -319,18 +319,27 @@ class BaseBusinessDetailSpider(Spider):
         )
 
     def failed_request(self, request, response):
-        """请求、解析错误次数超过上限后,添加failed字段,将错误信息重新保存至数据库"""
-        failed_info = request.list_info
-        failed_times = int(failed_info.pop("failed_times", 0)) + 1
-        failed_item = Item(
-            **failed_info,
+        """请求、解析错误次数超过上限后,记录错误详情信息"""
+        failed_request_info = request.list_info
+        failed_times = int(failed_request_info.pop("failed_times", 0)) + 1
+        failed_items = dict(
+            state=3,  # 待采集任务失败采集状态[3=采集失败]
             failed_times=failed_times,
-            err_reason=getattr(request, "error_msg", ""),
+            reason=getattr(request, "error_msg", ""),
             status_code=getattr(response, "status_code", -1),
-            create_at=tools.ensure_int64(tools.get_current_timestamp())
+            create_at=tools.ensure_int64(tools.get_current_timestamp()),
+            **failed_request_info,
         )
-        failed_item.table_name = setting.TASK_FAILED
-        yield failed_item
+        if 'queue_name' not in failed_items:
+            failed_items['queue_name'] = setting.TAB_ITEMS.format(
+                redis_key=self._redis_key.replace('_detailc', '')
+            )
+
+        def callback():
+            # 推送采集失败任务信息
+            self._rabbitmq.add(setting.TASK_CRAWL_STATE, failed_items)
+
+        yield callback
 
     def consume_tasks(self, limit=None, auto_ack=True):
         """