build_spider.py 568 B

1234567891011121314151617181920212223
  1. from crawler import BreadthCrawler
  2. from crawler.engines import BingSearchEngine, QccSearchEngine
  3. def main():
  4. BreadthCrawler(
  5. init_validator=True,
  6. url_weight=20,
  7. org_weight=5,
  8. keyword_weight=15,
  9. loop_sync_interval=1200,
  10. query_kw_engine=BingSearchEngine(),
  11. loop_query_kw_interval=10,
  12. max_query_page=30,
  13. query_org_engine=QccSearchEngine(),
  14. loop_query_org_interval=300,
  15. loop_excavate_interval=10,
  16. excavate_workers=1
  17. ).start()
  18. if __name__ == '__main__':
  19. main()