build_spider.py 460 B

1234567891011121314151617181920
  1. from crawler import BreadthCrawler
  2. from crawler.engines import BingSearchEngine, QccSearchEngine
  3. def main():
  4. BreadthCrawler(
  5. allow_sync_data=True,
  6. allow_query=True,
  7. keyword_query_engine=BingSearchEngine(),
  8. org_query_engine=QccSearchEngine(),
  9. url_weight=20,
  10. org_weight=5,
  11. keyword_weight=15,
  12. excavate_depth=3,
  13. excavate_workers=5,
  14. ).start()
  15. if __name__ == '__main__':
  16. main()