__init__.py 914 B

12345678910111213141516171819202122232425
  1. from concurrent.futures import ThreadPoolExecutor, wait
  2. from crawler.spiders import SearchEngine, VisitDomain
  3. from crawler.utils import err_details
  4. class BreadthCrawler(SearchEngine, VisitDomain):
  5. def __init__(self, workers=2, **kwargs):
  6. SearchEngine.__init__(self, **kwargs)
  7. VisitDomain.__init__(self, **kwargs)
  8. self._workers = workers
  9. def start(self):
  10. with ThreadPoolExecutor(max_workers=self._workers) as executor:
  11. futures = []
  12. f_engine = executor.submit(self.search_engines)
  13. f_engine.add_done_callback(err_details)
  14. futures.append(f_engine)
  15. for _ in range(1, self._workers + 1):
  16. f_domain = executor.submit(self.search_domains)
  17. f_domain.add_done_callback(err_details)
  18. futures.append(f_domain)
  19. wait(futures)
  20. print('寻源任务结束')