123456789101112131415161718192021222324 |
- from crawler import BreadthCrawler
- from crawler.engines import BingSearchEngine, QccSearchEngine
- def main():
- BreadthCrawler(
- init_validator=True,
- init_collector=True,
- url_weight=20,
- org_weight=5,
- keyword_weight=15,
- loop_sync_interval=1200,
- query_kw_engine=BingSearchEngine(),
- loop_query_kw_interval=10,
- max_query_page=30,
- query_org_engine=QccSearchEngine(),
- loop_query_org_interval=300,
- loop_excavate_interval=10,
- excavate_workers=1
- ).start()
- if __name__ == '__main__':
- main()
|