from concurrent.futures import ThreadPoolExecutor, wait from crawler.spiders import SearchEngine, VisitDomain from crawler.utils import err_details class BreadthCrawler(SearchEngine, VisitDomain): def __init__(self, workers=2, **kwargs): SearchEngine.__init__(self, **kwargs) VisitDomain.__init__(self, **kwargs) self._workers = workers if workers < 2 else 2 def start(self): with ThreadPoolExecutor(max_workers=self._workers) as executor: futures = [] f_engine = executor.submit(self.search_engines) f_engine.add_done_callback(err_details) futures.append(f_engine) for _ in range(1, self._workers + 1): f_domain = executor.submit(self.search_domains) f_domain.add_done_callback(err_details) futures.append(f_domain) wait(futures) print('寻源任务结束')