import threading from crawler.services import ( SyncData, QueryKeyWord, QueryOrganization, DataExcavate ) class BreadthCrawler: def __init__( self, query_kw_engine=None, query_org_engine=None, **kwargs ): self.enable_query_kw = False self.enable_query_org = False '''同步服务''' SyncData( init_validator=kwargs.get('init_validator', True), loop_interval=kwargs.get('loop_sync_interval', 1200) ) '''查询服务''' if query_kw_engine is not None: self._query_kw = QueryKeyWord( engine=query_kw_engine, query_workers=kwargs.get('query_kw_workers', 1), loop_query_interval=kwargs.get('loop_query_kw_interval', 60), max_query_page=kwargs.get('max_query_page', 3) ) self.enable_query_kw = True if query_org_engine is not None: self._query_org = QueryOrganization( engine=query_org_engine, query_workers=kwargs.get('query_org_workers', 1), loop_query_interval=kwargs.get('loop_query_org_interval', 60), ) self.enable_query_org = True '''数据挖掘服务''' self._excavator = DataExcavate( workers=kwargs.get('excavate_workers', 1), loop_interval=kwargs.get('loop_excavate_interval', 20) ) def start(self): if self.enable_query_kw: threading.Thread( target=self._query_kw.start, name='MainQueryKeyWord' ).start() if self.enable_query_org: threading.Thread( target=self._query_org.start, name='MainQueryOrganization' ).start() threading.Thread( target=self._excavator.start, name='MainDataExcavate' ).start()