|
@@ -7,6 +7,7 @@ Created on 2024-08-19
|
|
|
@author: Dzr
|
|
|
"""
|
|
|
|
|
|
+from itertools import product
|
|
|
from threading import Thread
|
|
|
|
|
|
import feapder.setting as setting
|
|
@@ -60,7 +61,7 @@ class Spider(BaseParser, Thread):
|
|
|
|
|
|
request.parser_name = request.parser_name or self.name
|
|
|
self._memory_db.add(request)
|
|
|
- except IOError:
|
|
|
+ except (IOError, AttributeError):
|
|
|
log.error("distribute task failed")
|
|
|
|
|
|
def all_thread_is_done(self):
|
|
@@ -181,6 +182,18 @@ class BaseBusinessListSpider(Spider):
|
|
|
|
|
|
return next(_page_increment(), None)
|
|
|
|
|
|
+ def product(self):
|
|
|
+ menus = getattr(self, "menus", None)
|
|
|
+ if not menus:
|
|
|
+ raise AttributeError("请设置 menus")
|
|
|
+
|
|
|
+ def _get_page(menu):
|
|
|
+ return getattr(menu, "crawl_page", 0)
|
|
|
+
|
|
|
+ iterable = product(menus, (p for menu in menus for p in range(1, _get_page(menu) + 1)))
|
|
|
+ for menu, page in iterable:
|
|
|
+ yield menu, page
|
|
|
+
|
|
|
|
|
|
class BaseBusinessDetailSpider(Spider):
|
|
|
"""详情页采集基础爬虫"""
|