Browse Source

更换采集翻页方式

dzr 1 month ago
parent
commit
1b43218f58
2 changed files with 4 additions and 10 deletions
  1. 2 5
      a_gtcgpt_cggg/Gtcgpt.py
  2. 2 5
      a_gtcgpt_cgjg/Gtcgpt.py

+ 2 - 5
a_gtcgpt_cggg/Gtcgpt.py

@@ -52,7 +52,8 @@ class Spider(feapder.BiddingListSpider):
             }
             self.headers["Referer"] = tools.joint_url(referer, params)
             url = f"https://cg.95306.cn/proxy/portal/elasticSearch/{menu.tid}"
-            yield feapder.Request(url, proxies=False, item=menu._asdict(), page=1)
+            for page in range(1, menu.crawl_page + 1):
+                yield feapder.Request(url, proxies=False, item=menu._asdict(), page=page)
 
     def download_midware(self, request):
         if self.alteon_pcgmh is None:
@@ -142,10 +143,6 @@ class Spider(feapder.BiddingListSpider):
             list_item.parse_url = "https://cg.95306.cn/proxy/portal/elasticSearch/indexView"
             yield list_item
 
-        # 无限翻页设置
-        request = self.infinite_pages(request, response)
-        yield request
-
     def exception_request(self, request, response):
         self.alteon_pcgmh = None
         self.cookies = None

+ 2 - 5
a_gtcgpt_cgjg/Gtcgpt.py

@@ -54,7 +54,8 @@ class Spider(feapder.BiddingListSpider):
             }
             self.headers["Referer"] = tools.joint_url(referer, params)
             url = f"https://cg.95306.cn/proxy/portal/elasticSearch/{menu.tid}"
-            yield feapder.Request(url, proxies=False, item=menu._asdict(), page=1)
+            for page in range(1, menu.crawl_page + 1):
+                yield feapder.Request(url, proxies=False, item=menu._asdict(), page=page)
 
     def download_midware(self, request):
         if self.alteon_pcgmh is None:
@@ -144,10 +145,6 @@ class Spider(feapder.BiddingListSpider):
             list_item.parse_url = "https://cg.95306.cn/proxy/portal/elasticSearch/indexView"
             yield list_item
 
-        # 无限翻页设置
-        request = self.infinite_pages(request, response)
-        yield request
-
     def exception_request(self, request, response):
         self.alteon_pcgmh = None
         self.cookies = None