|
@@ -86,7 +86,6 @@ class Spider(
|
|
|
break
|
|
|
else:
|
|
|
self._is_notify_end = False
|
|
|
- self.report_node_heartbeat('running')
|
|
|
|
|
|
self.check_task_status()
|
|
|
except (Exception, BaseException) as e:
|
|
@@ -227,50 +226,6 @@ class BaseBusinessListSpider(Spider):
|
|
|
__business_type__ = "List"
|
|
|
__extract_count__ = 0
|
|
|
|
|
|
- def _increment_page_number(self, request):
|
|
|
- """无限翻页 - 页码自增"""
|
|
|
- if self.platform_next_page:
|
|
|
- if getattr(request, 'real_page', None) is None:
|
|
|
- request.real_page = 0 # real_page=连续翻页页码(真实入库数量=0)
|
|
|
-
|
|
|
- request.real_page += 1
|
|
|
-
|
|
|
- if request.rel_count > 0:
|
|
|
- request.real_page = 0 # 当真实入库数量大于0,重置翻页记录
|
|
|
- request.rel_count = 0 # 重置实际入库数量
|
|
|
-
|
|
|
- if request.real_page <= 5 and request.page < self.platform_max_page:
|
|
|
- request.page += 1
|
|
|
- # 设置无限翻页回调方法,进行列表页解析处理
|
|
|
- callback_parser = (
|
|
|
- request.callback
|
|
|
- if callable(request.callback)
|
|
|
- else self.parse
|
|
|
- )
|
|
|
- request.callback = callback_parser
|
|
|
- yield request
|
|
|
- else:
|
|
|
- if request.page < int(request.item["crawl_page"]):
|
|
|
- request.page += 1 # 采集页码自增
|
|
|
- request.rel_count = 0 # 重置实际入库数量
|
|
|
- # 设置无限翻页回调方法,进行列表页解析处理
|
|
|
- callback_parser = (
|
|
|
- request.callback
|
|
|
- if callable(request.callback)
|
|
|
- else self.parse
|
|
|
- )
|
|
|
- request.callback = callback_parser
|
|
|
- yield request
|
|
|
-
|
|
|
- def infinite_pages(self, request, response):
|
|
|
- """无限翻页"""
|
|
|
- request_generator = self._increment_page_number(request)
|
|
|
- try:
|
|
|
- request = next(request_generator)
|
|
|
- return request
|
|
|
- except StopIteration:
|
|
|
- pass
|
|
|
-
|
|
|
@classmethod
|
|
|
def get_extract_count(cls):
|
|
|
return cls.__extract_count__
|