|
@@ -1,25 +1,25 @@
|
|
from concurrent.futures import ThreadPoolExecutor, wait
|
|
from concurrent.futures import ThreadPoolExecutor, wait
|
|
|
|
|
|
-from crawler.spiders import SearchEngine, SearchDomain
|
|
|
|
|
|
+from crawler.spiders import SearchEngine, VisitDomain
|
|
from crawler.utils import err_details
|
|
from crawler.utils import err_details
|
|
|
|
|
|
|
|
|
|
-class BreadthCrawler(SearchEngine, SearchDomain):
|
|
|
|
|
|
+class BreadthCrawler(SearchEngine, VisitDomain):
|
|
|
|
|
|
- def __init__(self, workers=1, **kwargs):
|
|
|
|
|
|
+ def __init__(self, workers=2, **kwargs):
|
|
SearchEngine.__init__(self, **kwargs)
|
|
SearchEngine.__init__(self, **kwargs)
|
|
- SearchDomain.__init__(self, **kwargs)
|
|
|
|
|
|
+ VisitDomain.__init__(self, **kwargs)
|
|
self._workers = workers
|
|
self._workers = workers
|
|
|
|
|
|
def start(self):
|
|
def start(self):
|
|
with ThreadPoolExecutor(max_workers=self._workers) as executor:
|
|
with ThreadPoolExecutor(max_workers=self._workers) as executor:
|
|
futures = []
|
|
futures = []
|
|
- f = executor.submit(self.search_engines)
|
|
|
|
- f.add_done_callback(err_details)
|
|
|
|
- futures.append(f)
|
|
|
|
|
|
+ f_engine = executor.submit(self.search_engines)
|
|
|
|
+ f_engine.add_done_callback(err_details)
|
|
|
|
+ futures.append(f_engine)
|
|
for _ in range(1, self._workers + 1):
|
|
for _ in range(1, self._workers + 1):
|
|
- future = executor.submit(self.crawl_spider)
|
|
|
|
- future.add_done_callback(err_details)
|
|
|
|
- futures.append(future)
|
|
|
|
|
|
+ f_domain = executor.submit(self.search_domains)
|
|
|
|
+ f_domain.add_done_callback(err_details)
|
|
|
|
+ futures.append(f_domain)
|
|
wait(futures)
|
|
wait(futures)
|
|
print('寻源任务结束')
|
|
print('寻源任务结束')
|