build_spider.py 462 B

123456789101112131415161718192021
  1. from crawler import BreadthCrawler
  2. from crawler.search import BingSearchEngine
  3. def main():
  4. engines_lst = [BingSearchEngine()]
  5. BreadthCrawler(
  6. allow_load_filter=True,
  7. engines=engines_lst,
  8. url_weight=20,
  9. org_weight=5,
  10. keyword_weight=15,
  11. max_search_page=30,
  12. excavate_workers=1,
  13. loop_search_interval=30,
  14. loop_excavate_interval=10
  15. ).start()
  16. if __name__ == '__main__':
  17. main()