瀏覽代碼

删除 request.is_mix 属性

dongzhaorui 1 年之前
父節點
當前提交
beb4c4bd8a

+ 0 - 2
FworkSpider/feapder/__init__.py

@@ -16,7 +16,6 @@ sys.path.insert(0, re.sub(r"([\\/]items$)|([\\/]spiders$)", "", os.getcwd()))
 __all__ = [
     "AirSpider",
     "Spider",
-    "MixBusinessSpider",
     "BiddingListSpider",
     "BiddingDetailSpider",
     "PlanToBuildListSpider",
@@ -34,7 +33,6 @@ __all__ = [
 from feapder.core.spiders import (
     Spider,
     AirSpider,
-    MixBusinessSpider,
     BiddingListSpider,
     BiddingDetailSpider,
     PlanToBuildListSpider,

+ 7 - 1
FworkSpider/feapder/core/parser_control.py

@@ -203,7 +203,13 @@ class PaserControl(threading.Thread):
                         elif isinstance(result, Item):
                             result_type = 2
 
-                            result.is_mixed = parser.is_mix  # 添加属性 - 混合采集
+                            # 爬虫采集方式
+                            #   True=混合采集(列表页+详情页)
+                            #   False=独立采集(列表页,详情页)
+                            result.is_mixed = False
+                            if "List" in parser.__business_type__ and hasattr(result, 'contenthtml'):
+                                result.is_mixed = True
+
                             counter['extractQuantity'] += 1  # 统计抽取列表数
                             if not self.is_duplicate(result):
                                 counter['realQuantity'] += 1  # 统计实际列表数

+ 0 - 2
FworkSpider/feapder/core/spiders/__init__.py

@@ -13,7 +13,6 @@ __all__ = [
     "Spider",
     "BiddingListSpider",
     "BiddingDetailSpider",
-    "MixBusinessSpider",
     "PlanToBuildListSpider",
     "PlanToBuildDetailSpider",
 ]
@@ -23,7 +22,6 @@ from feapder.core.spiders.spider import (
     Spider,
     BiddingListSpider,
     BiddingDetailSpider,
-    MixBusinessSpider,
     PlanToBuildListSpider,
     PlanToBuildDetailSpider
 )

+ 0 - 12
FworkSpider/feapder/core/spiders/spider.py

@@ -100,11 +100,6 @@ class Spider(
         DebugSpider.__name__ = cls.__name__
         return DebugSpider(*args, **kwargs)
 
-    @property
-    def is_mix(self):
-        """爬虫采集方式 True=混合采集(列表页+详情页) False=独立采集(列表页,详情页)"""
-        return False
-
 
 class DebugSpider(Spider):
     """
@@ -249,13 +244,6 @@ class BaseBusinessListSpider(Spider):
             pass
 
 
-class MixBusinessSpider(BaseBusinessListSpider):
-    """混采(列表页+详情页)采集基础爬虫"""
-
-    def is_mix(self):
-        return True
-
-
 class BaseBusinessDetailSpider(Spider):
     """详情页采集基础爬虫"""