|
@@ -1,21 +1,62 @@
|
|
import threading
|
|
import threading
|
|
|
|
|
|
-from crawler.services import SyncData, DataQuery, DataExcavate
|
|
|
|
|
|
+from crawler.services import (
|
|
|
|
+ SyncData,
|
|
|
|
+ QueryKeyWord,
|
|
|
|
+ QueryOrganization,
|
|
|
|
+ DataExcavate
|
|
|
|
+)
|
|
|
|
|
|
|
|
|
|
class BreadthCrawler:
|
|
class BreadthCrawler:
|
|
|
|
|
|
- def __init__(self, engines=None, **kwargs):
|
|
|
|
- SyncData(**kwargs)
|
|
|
|
- self._query = DataQuery(engines, **kwargs)
|
|
|
|
- self._excavator = DataExcavate(**kwargs)
|
|
|
|
|
|
+ def __init__(
|
|
|
|
+ self,
|
|
|
|
+ query_kw_engine=None,
|
|
|
|
+ query_org_engine=None,
|
|
|
|
+ **kwargs
|
|
|
|
+ ):
|
|
|
|
+ self.enable_query_kw = False
|
|
|
|
+ self.enable_query_org = False
|
|
|
|
+ '''同步服务'''
|
|
|
|
+ SyncData(
|
|
|
|
+ init_validator=kwargs.get('init_validator', True),
|
|
|
|
+ loop_interval=kwargs.get('loop_sync_interval', 1200)
|
|
|
|
+ )
|
|
|
|
+ '''查询服务'''
|
|
|
|
+ if query_kw_engine is not None:
|
|
|
|
+ self._query_kw = QueryKeyWord(
|
|
|
|
+ engine=query_kw_engine,
|
|
|
|
+ query_workers=kwargs.get('query_kw_workers', 1),
|
|
|
|
+ loop_query_interval=kwargs.get('loop_query_kw_interval', 60),
|
|
|
|
+ max_query_page=kwargs.get('max_query_page', 3)
|
|
|
|
+ )
|
|
|
|
+ self.enable_query_kw = True
|
|
|
|
+ if query_org_engine is not None:
|
|
|
|
+ self._query_org = QueryOrganization(
|
|
|
|
+ engine=query_org_engine,
|
|
|
|
+ query_workers=kwargs.get('query_org_workers', 1),
|
|
|
|
+ loop_query_interval=kwargs.get('loop_query_org_interval', 60),
|
|
|
|
+ )
|
|
|
|
+ self.enable_query_org = True
|
|
|
|
+ '''数据挖掘服务'''
|
|
|
|
+ self._excavator = DataExcavate(
|
|
|
|
+ workers=kwargs.get('excavate_workers', 1),
|
|
|
|
+ loop_interval=kwargs.get('loop_excavate_interval', 20)
|
|
|
|
+ )
|
|
|
|
|
|
def start(self):
|
|
def start(self):
|
|
- threading.Thread(
|
|
|
|
- target=self._query.start,
|
|
|
|
- name='MainDataQuery'
|
|
|
|
- ).start()
|
|
|
|
|
|
+ if self.enable_query_kw:
|
|
|
|
+ threading.Thread(
|
|
|
|
+ target=self._query_kw.start,
|
|
|
|
+ name='MainQueryKeyWord'
|
|
|
|
+ ).start()
|
|
|
|
+ if self.enable_query_org:
|
|
|
|
+ threading.Thread(
|
|
|
|
+ target=self._query_org.start,
|
|
|
|
+ name='MainQueryOrganization'
|
|
|
|
+ ).start()
|
|
threading.Thread(
|
|
threading.Thread(
|
|
target=self._excavator.start,
|
|
target=self._excavator.start,
|
|
- name='MainSearchVisit'
|
|
|
|
|
|
+ name='MainDataExcavate'
|
|
).start()
|
|
).start()
|