|
@@ -100,6 +100,11 @@ class Spider(
|
|
|
DebugSpider.__name__ = cls.__name__
|
|
|
return DebugSpider(*args, **kwargs)
|
|
|
|
|
|
+ @property
|
|
|
+ def is_mix(self):
|
|
|
+ """爬虫采集方式 True=混合采集(列表页+详情页) False=独立采集(列表页,详情页)"""
|
|
|
+ return False
|
|
|
+
|
|
|
|
|
|
class DebugSpider(Spider):
|
|
|
"""
|
|
@@ -221,7 +226,7 @@ class DebugSpider(Spider):
|
|
|
|
|
|
|
|
|
class BaseBusinessListSpider(Spider):
|
|
|
- """列表页采集业务基类"""
|
|
|
+ """列表页采集基础爬虫"""
|
|
|
|
|
|
__business_type__ = "List"
|
|
|
__extract_count__ = 0
|
|
@@ -240,7 +245,7 @@ class BaseBusinessListSpider(Spider):
|
|
|
|
|
|
|
|
|
class BaseBusinessDetailSpider(Spider):
|
|
|
- """详情页采集业务基类"""
|
|
|
+ """详情页采集基础爬虫"""
|
|
|
|
|
|
__business_type__ = "Detail"
|
|
|
__business_setting__ = dict(
|
|
@@ -317,25 +322,32 @@ class BaseBusinessDetailSpider(Spider):
|
|
|
return task_lst
|
|
|
|
|
|
|
|
|
+class MixBusinessSpider(BaseBusinessListSpider):
|
|
|
+ """混采(列表页+详情页)采集基础爬虫"""
|
|
|
+
|
|
|
+ def is_mix(self):
|
|
|
+ return True
|
|
|
+
|
|
|
+
|
|
|
class BiddingListSpider(BaseBusinessListSpider):
|
|
|
- """标讯列表页采集业务类"""
|
|
|
+ """标讯列表页采集爬虫"""
|
|
|
|
|
|
__business_type__ = "BiddingList"
|
|
|
|
|
|
|
|
|
class BiddingDetailSpider(BaseBusinessDetailSpider):
|
|
|
- """标讯详情页采集业务类"""
|
|
|
+ """标讯详情页采集爬虫"""
|
|
|
|
|
|
__business_type__ = "BiddingDetail"
|
|
|
|
|
|
|
|
|
class PlanToBuildListSpider(BaseBusinessListSpider):
|
|
|
- """拟建列表页采集业务类"""
|
|
|
+ """拟建列表页采集爬虫"""
|
|
|
|
|
|
__business_type__ = "PlanToBuildList"
|
|
|
|
|
|
|
|
|
class PlanToBuildDetailSpider(BaseBusinessDetailSpider):
|
|
|
- """拟建详情页采集业务类"""
|
|
|
+ """拟建详情页采集爬虫"""
|
|
|
|
|
|
__business_type__ = "PlanToBuildDetail"
|