__init__.py 912 B

12345678910111213141516171819202122232425262728
  1. import threading
  2. from concurrent.futures import ThreadPoolExecutor, wait
  3. from crawler.spiders import SearchEngine, VisitDomain, SyncData
  4. from crawler.utils import err_details
  5. class BreadthCrawler(SearchEngine, VisitDomain):
  6. def __init__(self, workers=1, **kwargs):
  7. SyncData(**kwargs)
  8. SearchEngine.__init__(self, **kwargs)
  9. VisitDomain.__init__(self, **kwargs)
  10. self._workers = workers
  11. def start(self):
  12. threading.Thread(
  13. target=self.load_engines,
  14. name='MainSearchEngine'
  15. ).start()
  16. with ThreadPoolExecutor(max_workers=self._workers) as executor:
  17. futures = []
  18. for _ in range(1, self._workers + 1):
  19. f = executor.submit(self.search_domains)
  20. f.add_done_callback(err_details)
  21. futures.append(f)
  22. wait(futures)
  23. print('寻源任务结束')