1234567891011121314151617181920 |
- from crawler import BreadthCrawler
- from crawler.engines import BingSearchEngine, QccSearchEngine
- def main():
- BreadthCrawler(
- allow_sync_data=True,
- allow_query=True,
- keyword_query_engine=BingSearchEngine(),
- org_query_engine=QccSearchEngine(),
- url_weight=20,
- org_weight=5,
- keyword_weight=15,
- excavate_depth=3,
- excavate_workers=5,
- ).start()
- if __name__ == '__main__':
- main()
|