|
@@ -1,28 +1,22 @@
|
|
|
import threading
|
|
|
-from concurrent.futures import ThreadPoolExecutor, wait
|
|
|
|
|
|
-from crawler.spiders import SearchEngine, VisitDomain, SyncData
|
|
|
-from crawler.utils import err_details
|
|
|
+from crawler.spiders import SyncData, SearchEngine, VisitDomain
|
|
|
|
|
|
|
|
|
-class BreadthCrawler(SearchEngine, VisitDomain):
|
|
|
+class BreadthCrawler:
|
|
|
|
|
|
- def __init__(self, workers=1, **kwargs):
|
|
|
+ def __init__(self, engines=None, visit_workers=1, **kwargs):
|
|
|
SyncData(**kwargs)
|
|
|
- SearchEngine.__init__(self, **kwargs)
|
|
|
- VisitDomain.__init__(self, **kwargs)
|
|
|
- self._workers = workers
|
|
|
+ self._engines = SearchEngine(**kwargs)
|
|
|
+ self._engines.set_engines(engines)
|
|
|
+ self._excavator = VisitDomain(visit_workers=visit_workers, **kwargs)
|
|
|
|
|
|
def start(self):
|
|
|
threading.Thread(
|
|
|
- target=self.load_engines,
|
|
|
+ target=self._engines.start,
|
|
|
name='MainSearchEngine'
|
|
|
).start()
|
|
|
- with ThreadPoolExecutor(max_workers=self._workers) as executor:
|
|
|
- futures = []
|
|
|
- for _ in range(1, self._workers + 1):
|
|
|
- f = executor.submit(self.search_domains)
|
|
|
- f.add_done_callback(err_details)
|
|
|
- futures.append(f)
|
|
|
- wait(futures)
|
|
|
- print('寻源任务结束')
|
|
|
+ threading.Thread(
|
|
|
+ target=self._excavator.start,
|
|
|
+ name='MainSearchVisit'
|
|
|
+ ).start()
|