Forráskód Böngészése

详情爬虫新增消费队列获取任务

dongzhaorui 1 éve
szülő
commit
ad3a96a70e
1 módosított fájl, 17 hozzáadás és 7 törlés
  1. 17 7
      FworkSpider/feapder/core/spiders/spider.py

+ 17 - 7
FworkSpider/feapder/core/spiders/spider.py

@@ -11,6 +11,7 @@ Created on 2020/4/22 12:05 AM
 import warnings
 from collections import Iterable
 
+import feapder.setting as setting
 import feapder.utils.tools as tools
 from feapder.core.base_parser import BaseParser
 from feapder.core.scheduler import Scheduler
@@ -224,7 +225,7 @@ class DebugSpider(Spider):
             tools.delay_time(1)  # 1秒钟检查一次爬虫状态
 
 
-class BusinessBaseListSpider(Spider):
+class BaseBusinessListSpider(Spider):
     """列表页采集业务基类"""
 
     __business_type__ = "List"
@@ -287,7 +288,7 @@ class BusinessBaseListSpider(Spider):
         cls.__extract_count__ += 1
 
 
-class BusinessBaseDetailSpider(Spider):
+class BaseBusinessDetailSpider(Spider):
     """详情页采集业务基类"""
 
     __business_type__ = "Detail"
@@ -312,7 +313,8 @@ class BusinessBaseDetailSpider(Spider):
         self.__class__.__custom_setting__.update(
             self.__class__.__business_setting__
         )
-        super(BusinessBaseDetailSpider, self).__init__(
+        redis_key = f'{redis_key}_detailc'
+        super(BaseBusinessDetailSpider, self).__init__(
             redis_key=redis_key,
             thread_count=thread_count,
             begin_callback=begin_callback,
@@ -363,14 +365,22 @@ class BusinessBaseDetailSpider(Spider):
             self._to_db = MongoDB()
         return self._to_db
 
+    def consume_tasks(self, limit=100):
+        queue_name = setting.TAB_ITEMS.format(
+            redis_key=self._redis_key.replace('_detailc', '')
+        )
+        sizes = limit or setting.COLLECTOR_TASK_COUNT
+        messages = self._rabbitmq.get(queue_name, sizes, True, to_str=False)
+        return [message.body.to_dict for message in messages]
+
 
-class BiddingListSpider(BusinessBaseListSpider):
+class BiddingListSpider(BaseBusinessListSpider):
     """标讯列表页采集业务类"""
 
     __business_type__ = "BiddingList"
 
 
-class BiddingDetailSpider(BusinessBaseDetailSpider):
+class BiddingDetailSpider(BaseBusinessDetailSpider):
     """标讯详情页采集业务类"""
 
     __business_type__ = "BiddingDetail"
@@ -383,13 +393,13 @@ class BiddingDetailSpider(BusinessBaseDetailSpider):
         yield failed_item
 
 
-class PlanToBuildListSpider(BusinessBaseListSpider):
+class PlanToBuildListSpider(BaseBusinessListSpider):
     """拟建列表页采集业务类"""
 
     __business_type__ = "PlanToBuildList"
 
 
-class PlanToBuildDetailSpider(BusinessBaseDetailSpider):
+class PlanToBuildDetailSpider(BaseBusinessDetailSpider):
     """拟建详情页采集业务类"""
 
     __business_type__ = "PlanToBuildDetail"