|
@@ -30,6 +30,7 @@ class CrawlListPageSpider:
|
|
self.host = 'https://www.zbytb.com/search'
|
|
self.host = 'https://www.zbytb.com/search'
|
|
self.headers = kwargs.get('headers') or headers
|
|
self.headers = kwargs.get('headers') or headers
|
|
self.proxy = Proxy(enable_proxy)
|
|
self.proxy = Proxy(enable_proxy)
|
|
|
|
+ self.max_page = kwargs.get('max_page', 10)
|
|
self.allow_show_exception = kwargs.get('allow_show_exception', False)
|
|
self.allow_show_exception = kwargs.get('allow_show_exception', False)
|
|
|
|
|
|
def crawl_request(self, url, **kwargs):
|
|
def crawl_request(self, url, **kwargs):
|
|
@@ -142,7 +143,7 @@ class CrawlListPageSpider:
|
|
# 遍历省份
|
|
# 遍历省份
|
|
for area_id in range(1, 32):
|
|
for area_id in range(1, 32):
|
|
# 遍历页码
|
|
# 遍历页码
|
|
- for page in range(1, 10):
|
|
|
|
|
|
+ for page in range(1, self.max_page + 1):
|
|
tasks.append((module_id, area_id, page))
|
|
tasks.append((module_id, area_id, page))
|
|
yield from tasks
|
|
yield from tasks
|
|
|
|
|