from concurrent.futures import ThreadPoolExecutor, wait from crawler.spiders import SearchEngine, SearchDomain from crawler.utils import err_details class BreadthCrawler(SearchEngine, SearchDomain): def __init__(self, workers=1, **kwargs): SearchEngine.__init__(self, **kwargs) SearchDomain.__init__(self, **kwargs) self._workers = workers def start(self): with ThreadPoolExecutor(max_workers=self._workers) as executor: futures = [] f = executor.submit(self.search_engines) f.add_done_callback(err_details) futures.append(f) for _ in range(1, self._workers + 1): future = executor.submit(self.crawl_spider) future.add_done_callback(err_details) futures.append(future) wait(futures) print('寻源任务结束')