__init__.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import threading
  2. from crawler.services import (
  3. SyncData,
  4. QueryKeyWord,
  5. QueryOrganization,
  6. DataExcavate
  7. )
  8. class BreadthCrawler:
  9. def __init__(
  10. self,
  11. query_kw_engine=None,
  12. query_org_engine=None,
  13. **kwargs
  14. ):
  15. self.enable_query_kw = False
  16. self.enable_query_org = False
  17. '''同步服务'''
  18. SyncData(
  19. init_validator=kwargs.get('init_validator', True),
  20. loop_interval=kwargs.get('loop_sync_interval', 1200)
  21. )
  22. '''查询服务'''
  23. if query_kw_engine is not None:
  24. self._query_kw = QueryKeyWord(
  25. engine=query_kw_engine,
  26. query_workers=kwargs.get('query_kw_workers', 1),
  27. loop_query_interval=kwargs.get('loop_query_kw_interval', 60),
  28. max_query_page=kwargs.get('max_query_page', 3)
  29. )
  30. self.enable_query_kw = True
  31. if query_org_engine is not None:
  32. self._query_org = QueryOrganization(
  33. engine=query_org_engine,
  34. query_workers=kwargs.get('query_org_workers', 1),
  35. loop_query_interval=kwargs.get('loop_query_org_interval', 60),
  36. )
  37. self.enable_query_org = True
  38. '''数据挖掘服务'''
  39. self._excavator = DataExcavate(
  40. workers=kwargs.get('excavate_workers', 1),
  41. loop_interval=kwargs.get('loop_excavate_interval', 20)
  42. )
  43. def start(self):
  44. if self.enable_query_kw:
  45. threading.Thread(
  46. target=self._query_kw.start,
  47. name='MainQueryKeyWord'
  48. ).start()
  49. if self.enable_query_org:
  50. threading.Thread(
  51. target=self._query_org.start,
  52. name='MainQueryOrganization'
  53. ).start()
  54. threading.Thread(
  55. target=self._excavator.start,
  56. name='MainDataExcavate'
  57. ).start()