Explorar el Código

添加支持多线程自动翻页方法

dongzhaorui hace 3 semanas
padre
commit
b8c1ced399
Se han modificado 1 ficheros con 14 adiciones y 1 borrados
  1. 14 1
      FworkSpider/feapder/core/spiders/spider.py

+ 14 - 1
FworkSpider/feapder/core/spiders/spider.py

@@ -7,6 +7,7 @@ Created on 2024-08-19
 @author: Dzr
 """
 
+from itertools import product
 from threading import Thread
 
 import feapder.setting as setting
@@ -60,7 +61,7 @@ class Spider(BaseParser, Thread):
 
                 request.parser_name = request.parser_name or self.name
                 self._memory_db.add(request)
-        except IOError:
+        except (IOError, AttributeError):
             log.error("distribute task failed")
 
     def all_thread_is_done(self):
@@ -181,6 +182,18 @@ class BaseBusinessListSpider(Spider):
 
         return next(_page_increment(), None)
 
+    def product(self):
+        menus = getattr(self, "menus", None)
+        if not menus:
+            raise AttributeError("请设置 menus")
+
+        def _get_page(menu):
+            return getattr(menu, "crawl_page", 0)
+
+        iterable = product(menus, (p for menu in menus for p in range(1, _get_page(menu) + 1)))
+        for menu, page in iterable:
+            yield menu, page
+
 
 class BaseBusinessDetailSpider(Spider):
     """详情页采集基础爬虫"""