dongzhaorui 3 years ago
parent
commit
1e6bfcdb65
2 changed files with 52 additions and 11 deletions
  1. 51 10
      find_source/crawler/__init__.py
  2. 1 1
      find_source/crawler/services/__init__.py

+ 51 - 10
find_source/crawler/__init__.py

@@ -1,21 +1,62 @@
 import threading
 
-from crawler.services import SyncData, DataQuery, DataExcavate
+from crawler.services import (
+    SyncData,
+    QueryKeyWord,
+    QueryOrganization,
+    DataExcavate
+)
 
 
 class BreadthCrawler:
 
-    def __init__(self, engines=None, **kwargs):
-        SyncData(**kwargs)
-        self._query = DataQuery(engines, **kwargs)
-        self._excavator = DataExcavate(**kwargs)
+    def __init__(
+            self,
+            query_kw_engine=None,
+            query_org_engine=None,
+            **kwargs
+    ):
+        self.enable_query_kw = False
+        self.enable_query_org = False
+        '''同步服务'''
+        SyncData(
+            init_validator=kwargs.get('init_validator', True),
+            loop_interval=kwargs.get('loop_sync_interval', 1200)
+        )
+        '''查询服务'''
+        if query_kw_engine is not None:
+            self._query_kw = QueryKeyWord(
+                engine=query_kw_engine,
+                query_workers=kwargs.get('query_kw_workers', 1),
+                loop_query_interval=kwargs.get('loop_query_kw_interval', 60),
+                max_query_page=kwargs.get('max_query_page', 3)
+            )
+            self.enable_query_kw = True
+        if query_org_engine is not None:
+            self._query_org = QueryOrganization(
+                engine=query_org_engine,
+                query_workers=kwargs.get('query_org_workers', 1),
+                loop_query_interval=kwargs.get('loop_query_org_interval', 60),
+            )
+            self.enable_query_org = True
+        '''数据挖掘服务'''
+        self._excavator = DataExcavate(
+            workers=kwargs.get('excavate_workers', 1),
+            loop_interval=kwargs.get('loop_excavate_interval', 20)
+        )
 
     def start(self):
-        threading.Thread(
-            target=self._query.start,
-            name='MainDataQuery'
-        ).start()
+        if self.enable_query_kw:
+            threading.Thread(
+                target=self._query_kw.start,
+                name='MainQueryKeyWord'
+            ).start()
+        if self.enable_query_org:
+            threading.Thread(
+                target=self._query_org.start,
+                name='MainQueryOrganization'
+            ).start()
         threading.Thread(
             target=self._excavator.start,
-            name='MainSearchVisit'
+            name='MainDataExcavate'
         ).start()

+ 1 - 1
find_source/crawler/services/__init__.py

@@ -1,3 +1,3 @@
 from .data_excavate import DataExcavate
-from .data_query import DataQuery
+from .data_query import QueryKeyWord, QueryOrganization
 from .sync_data import SyncData