build_spider.py 597 B

123456789101112131415161718192021222324
  1. from crawler import BreadthCrawler
  2. from crawler.engines import BingSearchEngine, QccSearchEngine
  3. def main():
  4. BreadthCrawler(
  5. init_validator=True,
  6. init_collector=True,
  7. url_weight=20,
  8. org_weight=5,
  9. keyword_weight=15,
  10. loop_sync_interval=1200,
  11. query_kw_engine=BingSearchEngine(),
  12. loop_query_kw_interval=10,
  13. max_query_page=30,
  14. query_org_engine=QccSearchEngine(),
  15. loop_query_org_interval=300,
  16. loop_excavate_interval=10,
  17. excavate_workers=1
  18. ).start()
  19. if __name__ == '__main__':
  20. main()