dongzhaorui 3 жил өмнө
parent
commit
946ad81029

+ 11 - 17
find_source/crawler/__init__.py

@@ -1,28 +1,22 @@
 import threading
 import threading
-from concurrent.futures import ThreadPoolExecutor, wait
 
 
-from crawler.spiders import SearchEngine, VisitDomain, SyncData
-from crawler.utils import err_details
+from crawler.spiders import SyncData, SearchEngine, VisitDomain
 
 
 
 
-class BreadthCrawler(SearchEngine, VisitDomain):
+class BreadthCrawler:
 
 
-    def __init__(self, workers=1, **kwargs):
+    def __init__(self, engines=None, visit_workers=1, **kwargs):
         SyncData(**kwargs)
         SyncData(**kwargs)
-        SearchEngine.__init__(self, **kwargs)
-        VisitDomain.__init__(self, **kwargs)
-        self._workers = workers
+        self._engines = SearchEngine(**kwargs)
+        self._engines.set_engines(engines)
+        self._excavator = VisitDomain(visit_workers=visit_workers, **kwargs)
 
 
     def start(self):
     def start(self):
         threading.Thread(
         threading.Thread(
-            target=self.load_engines,
+            target=self._engines.start,
             name='MainSearchEngine'
             name='MainSearchEngine'
         ).start()
         ).start()
-        with ThreadPoolExecutor(max_workers=self._workers) as executor:
-            futures = []
-            for _ in range(1, self._workers + 1):
-                f = executor.submit(self.search_domains)
-                f.add_done_callback(err_details)
-                futures.append(f)
-            wait(futures)
-        print('寻源任务结束')
+        threading.Thread(
+            target=self._excavator.start,
+            name='MainSearchVisit'
+        ).start()