dongzhaorui 3 vuotta sitten
vanhempi
commit
f498522fd8
1 muutettua tiedostoa jossa 13 lisäystä ja 10 poistoa
  1. 13 10
      find_source/crawler/__init__.py

+ 13 - 10
find_source/crawler/__init__.py

@@ -1,25 +1,28 @@
+import threading
 from concurrent.futures import ThreadPoolExecutor, wait
 
-from crawler.spiders import SearchEngine, VisitDomain
+from crawler.spiders import SearchEngine, VisitDomain, SyncData
 from crawler.utils import err_details
 
 
 class BreadthCrawler(SearchEngine, VisitDomain):
 
-    def __init__(self, workers=2, **kwargs):
+    def __init__(self, workers=1, **kwargs):
+        SyncData(**kwargs)
         SearchEngine.__init__(self, **kwargs)
         VisitDomain.__init__(self, **kwargs)
-        self._workers = workers if workers < 2 else 2
+        self._workers = workers
 
     def start(self):
+        threading.Thread(
+            target=self.load_engines,
+            name='MainSearchEngine'
+        ).start()
         with ThreadPoolExecutor(max_workers=self._workers) as executor:
             futures = []
-            f_engine = executor.submit(self.search_engines)
-            f_engine.add_done_callback(err_details)
-            futures.append(f_engine)
             for _ in range(1, self._workers + 1):
-                f_domain = executor.submit(self.search_domains)
-                f_domain.add_done_callback(err_details)
-                futures.append(f_domain)
+                f = executor.submit(self.search_domains)
+                f.add_done_callback(err_details)
+                futures.append(f)
             wait(futures)
-            print('寻源任务结束')
+        print('寻源任务结束')