소스 검색

添加自动翻页

dongzhaorui 1 년 전
부모
커밋
9bc1fae670
2개의 변경된 파일16개의 추가작업 그리고 1개의 파일을 삭제
  1. 15 0
      FworkSpider/feapder/core/spiders/spider.py
  2. 1 1
      FworkSpider/feapder/network/request.py

+ 15 - 0
FworkSpider/feapder/core/spiders/spider.py

@@ -230,6 +230,21 @@ class BaseBusinessListSpider(Spider):
 
     __business_type__ = "List"
 
+    def __auto_increment_page_number(self, request):
+        """翻页 - 页码自增"""
+        if request.page < int(request.item["crawl_page"]):
+            request.page += 1  # 采集页码自增
+            yield request
+
+    def infinite_pages(self, request, response):
+        """翻页"""
+        request_generator = self.__auto_increment_page_number(request)
+        try:
+            request = next(request_generator)
+            return request
+        except StopIteration:
+            pass
+
 
 class MixBusinessSpider(BaseBusinessListSpider):
     """混采(列表页+详情页)采集基础爬虫"""

+ 1 - 1
FworkSpider/feapder/network/request.py

@@ -148,7 +148,7 @@ class Request(object):
         self.is_abandoned = is_abandoned
         self.render = render
         self.render_time = render_time or setting.WEBDRIVER.get("render_time", 0)
-
+        self.page = 1
         self.splash = splash
         self.iframes = iframes