|
@@ -4,19 +4,15 @@ from crawler.engines import BingSearchEngine, QccSearchEngine
|
|
|
|
|
|
def main():
|
|
|
BreadthCrawler(
|
|
|
- init_validator=True,
|
|
|
- init_collector=True,
|
|
|
+ allow_sync_data=True,
|
|
|
+ allow_query=True,
|
|
|
+ keyword_query_engine=BingSearchEngine(),
|
|
|
+ org_query_engine=QccSearchEngine(),
|
|
|
url_weight=20,
|
|
|
org_weight=5,
|
|
|
keyword_weight=15,
|
|
|
- loop_sync_interval=1200,
|
|
|
- query_kw_engine=BingSearchEngine(),
|
|
|
- loop_query_kw_interval=10,
|
|
|
- max_query_page=30,
|
|
|
- query_org_engine=QccSearchEngine(),
|
|
|
- loop_query_org_interval=300,
|
|
|
- loop_excavate_interval=10,
|
|
|
- excavate_workers=1
|
|
|
+ excavate_depth=3,
|
|
|
+ excavate_workers=5,
|
|
|
).start()
|
|
|
|
|
|
|