3
0
dongzhaorui 1 жил өмнө
parent
commit
9bc1fae670

+ 15 - 0
FworkSpider/feapder/core/spiders/spider.py

@@ -230,6 +230,21 @@ class BaseBusinessListSpider(Spider):
 
     __business_type__ = "List"
 
+    def __auto_increment_page_number(self, request):
+        """翻页 - 页码自增"""
+        if request.page < int(request.item["crawl_page"]):
+            request.page += 1  # 采集页码自增
+            yield request
+
+    def infinite_pages(self, request, response):
+        """翻页"""
+        request_generator = self.__auto_increment_page_number(request)
+        try:
+            request = next(request_generator)
+            return request
+        except StopIteration:
+            pass
+
 
 class MixBusinessSpider(BaseBusinessListSpider):
     """混采(列表页+详情页)采集基础爬虫"""

+ 1 - 1
FworkSpider/feapder/network/request.py

@@ -148,7 +148,7 @@ class Request(object):
         self.is_abandoned = is_abandoned
         self.render = render
         self.render_time = render_time or setting.WEBDRIVER.get("render_time", 0)
-
+        self.page = 1
         self.splash = splash
         self.iframes = iframes