|
@@ -18,15 +18,17 @@ class BreadthCrawler:
|
|
|
def __init__(
|
|
|
self,
|
|
|
enable_sync_data: bool,
|
|
|
+ enable_search: bool,
|
|
|
enable_query: bool,
|
|
|
enable_excavate: bool,
|
|
|
**kwargs,
|
|
|
):
|
|
|
+ self.enable_sync_data = enable_sync_data
|
|
|
+ self.enable_search = enable_search
|
|
|
self.enable_query = enable_query
|
|
|
self.enable_excavate = enable_excavate
|
|
|
- self.enable_sync_data = enable_sync_data
|
|
|
kwargs.update(self._weight_items)
|
|
|
- self.app = {}
|
|
|
+ self._app = {}
|
|
|
self._init(**kwargs)
|
|
|
|
|
|
def _init(self, **kwargs):
|
|
@@ -41,24 +43,26 @@ class BreadthCrawler:
|
|
|
orgs_interval=5 * 3600,
|
|
|
**kwargs
|
|
|
)
|
|
|
- self.app['MainSyncData'] = _sync_data
|
|
|
+ self._app['SyncData'] = _sync_data
|
|
|
|
|
|
- if self.enable_query:
|
|
|
- _query_keyword = DataQuery(
|
|
|
+ if self.enable_search:
|
|
|
+ _bing_search = DataQuery(
|
|
|
engine=BingSearchEngine(),
|
|
|
query_workers=kwargs.pop('query_workers', None),
|
|
|
max_pages=30,
|
|
|
- query_interval=300,
|
|
|
+ query_interval=3600,
|
|
|
**kwargs
|
|
|
)
|
|
|
- _query_organization = DataQuery(
|
|
|
+ self._app['BingSearch'] = _bing_search
|
|
|
+
|
|
|
+ if self.enable_query:
|
|
|
+ _qcc_query = DataQuery(
|
|
|
engine=QccSearchEngine(),
|
|
|
query_workers=kwargs.pop('query_workers', None),
|
|
|
query_interval=1800,
|
|
|
**kwargs
|
|
|
)
|
|
|
- self.app['MainQueryKeyWord'] = _query_keyword
|
|
|
- self.app['MainQueryOrganization'] = _query_organization
|
|
|
+ self._app['QccQuery'] = _qcc_query
|
|
|
|
|
|
if self.enable_excavate:
|
|
|
_excavator = DataExcavate(
|
|
@@ -67,8 +71,8 @@ class BreadthCrawler:
|
|
|
excavate_interval=10,
|
|
|
**kwargs
|
|
|
)
|
|
|
- self.app['MainDataExcavate'] = _excavator
|
|
|
+ self._app['DataExcavate'] = _excavator
|
|
|
|
|
|
def start(self):
|
|
|
- for name, app in self.app.items():
|
|
|
+ for name, app in self._app.items():
|
|
|
threading.Thread(target=app.start, name=name).start()
|