12345678910111213141516171819202122232425 |
- from concurrent.futures import ThreadPoolExecutor, wait
- from crawler.spiders import SearchEngine, VisitDomain
- from crawler.utils import err_details
- class BreadthCrawler(SearchEngine, VisitDomain):
- def __init__(self, workers=2, **kwargs):
- SearchEngine.__init__(self, **kwargs)
- VisitDomain.__init__(self, **kwargs)
- self._workers = workers
- def start(self):
- with ThreadPoolExecutor(max_workers=self._workers) as executor:
- futures = []
- f_engine = executor.submit(self.search_engines)
- f_engine.add_done_callback(err_details)
- futures.append(f_engine)
- for _ in range(1, self._workers + 1):
- f_domain = executor.submit(self.search_domains)
- f_domain.add_done_callback(err_details)
- futures.append(f_domain)
- wait(futures)
- print('寻源任务结束')
|