|
@@ -11,6 +11,7 @@ Created on 2020/4/22 12:05 AM
|
|
|
import warnings
|
|
|
from collections import Iterable
|
|
|
|
|
|
+import feapder.setting as setting
|
|
|
import feapder.utils.tools as tools
|
|
|
from feapder.core.base_parser import BaseParser
|
|
|
from feapder.core.scheduler import Scheduler
|
|
@@ -224,7 +225,7 @@ class DebugSpider(Spider):
|
|
|
tools.delay_time(1) # 1秒钟检查一次爬虫状态
|
|
|
|
|
|
|
|
|
-class BusinessBaseListSpider(Spider):
|
|
|
+class BaseBusinessListSpider(Spider):
|
|
|
"""列表页采集业务基类"""
|
|
|
|
|
|
__business_type__ = "List"
|
|
@@ -287,7 +288,7 @@ class BusinessBaseListSpider(Spider):
|
|
|
cls.__extract_count__ += 1
|
|
|
|
|
|
|
|
|
-class BusinessBaseDetailSpider(Spider):
|
|
|
+class BaseBusinessDetailSpider(Spider):
|
|
|
"""详情页采集业务基类"""
|
|
|
|
|
|
__business_type__ = "Detail"
|
|
@@ -312,7 +313,8 @@ class BusinessBaseDetailSpider(Spider):
|
|
|
self.__class__.__custom_setting__.update(
|
|
|
self.__class__.__business_setting__
|
|
|
)
|
|
|
- super(BusinessBaseDetailSpider, self).__init__(
|
|
|
+ redis_key = f'{redis_key}_detailc'
|
|
|
+ super(BaseBusinessDetailSpider, self).__init__(
|
|
|
redis_key=redis_key,
|
|
|
thread_count=thread_count,
|
|
|
begin_callback=begin_callback,
|
|
@@ -363,14 +365,22 @@ class BusinessBaseDetailSpider(Spider):
|
|
|
self._to_db = MongoDB()
|
|
|
return self._to_db
|
|
|
|
|
|
+ def consume_tasks(self, limit=100):
|
|
|
+ queue_name = setting.TAB_ITEMS.format(
|
|
|
+ redis_key=self._redis_key.replace('_detailc', '')
|
|
|
+ )
|
|
|
+ sizes = limit or setting.COLLECTOR_TASK_COUNT
|
|
|
+ messages = self._rabbitmq.get(queue_name, sizes, True, to_str=False)
|
|
|
+ return [message.body.to_dict for message in messages]
|
|
|
+
|
|
|
|
|
|
-class BiddingListSpider(BusinessBaseListSpider):
|
|
|
+class BiddingListSpider(BaseBusinessListSpider):
|
|
|
"""标讯列表页采集业务类"""
|
|
|
|
|
|
__business_type__ = "BiddingList"
|
|
|
|
|
|
|
|
|
-class BiddingDetailSpider(BusinessBaseDetailSpider):
|
|
|
+class BiddingDetailSpider(BaseBusinessDetailSpider):
|
|
|
"""标讯详情页采集业务类"""
|
|
|
|
|
|
__business_type__ = "BiddingDetail"
|
|
@@ -383,13 +393,13 @@ class BiddingDetailSpider(BusinessBaseDetailSpider):
|
|
|
yield failed_item
|
|
|
|
|
|
|
|
|
-class PlanToBuildListSpider(BusinessBaseListSpider):
|
|
|
+class PlanToBuildListSpider(BaseBusinessListSpider):
|
|
|
"""拟建列表页采集业务类"""
|
|
|
|
|
|
__business_type__ = "PlanToBuildList"
|
|
|
|
|
|
|
|
|
-class PlanToBuildDetailSpider(BusinessBaseDetailSpider):
|
|
|
+class PlanToBuildDetailSpider(BaseBusinessDetailSpider):
|
|
|
"""拟建详情页采集业务类"""
|
|
|
|
|
|
__business_type__ = "PlanToBuildDetail"
|