build_spider.py 492 B

12345678910111213141516171819202122
  1. from crawler import BreadthCrawler
  2. from crawler.engines import BingSearchEngine
  3. def main():
  4. engines_lst = [BingSearchEngine()]
  5. BreadthCrawler(
  6. init_validator=True,
  7. url_weight=20,
  8. org_weight=5,
  9. keyword_weight=15,
  10. engines=engines_lst,
  11. max_query_page=30,
  12. loop_sync_interval=1200,
  13. loop_query_interval=30,
  14. loop_excavate_interval=10,
  15. excavate_workers=1,
  16. ).start()
  17. if __name__ == '__main__':
  18. main()