|
@@ -5,19 +5,19 @@ from common.databases import mongo_table, redis_client
|
|
|
MGO_DATABASE = 'shujuziyuan'
|
|
|
'''去重表'''
|
|
|
MGO_REMOVAL_DUPLICATE = mongo_table(db=MGO_DATABASE, name='removal_duplicate')
|
|
|
-'''新源表'''
|
|
|
+'''数据挖掘结果表'''
|
|
|
MGO_DOMAIN = mongo_table(db=MGO_DATABASE, name='new_domains')
|
|
|
-'''单位组织站点网址表'''
|
|
|
+'''查询结果表'''
|
|
|
MGO_QUERY = mongo_table(db=MGO_DATABASE, name='data_query')
|
|
|
''''数据采集记录表'''
|
|
|
MGO_RECORDS = mongo_table(db=MGO_DATABASE, name='excavate_records')
|
|
|
-'''组织|单位'''
|
|
|
+'''组织|单位表'''
|
|
|
MGO_ORGS = mongo_table(db=MGO_DATABASE, name='retrieve_orgs')
|
|
|
'''搜索词'''
|
|
|
MGO_KEYWORDS = mongo_table(db=MGO_DATABASE, name='retrieve_keywords')
|
|
|
-'''种子url'''
|
|
|
+'''种子列表'''
|
|
|
MGO_URLS = mongo_table(db=MGO_DATABASE, name='retrieve_urls')
|
|
|
-'''竞品url'''
|
|
|
+'''竞品列表'''
|
|
|
MGO_COMPETING_GOODS = mongo_table(db=MGO_DATABASE, name='retrieve_competing_goods')
|
|
|
'''luaconfig'''
|
|
|
MGO_LUA_SPIDERS = mongo_table(db='editor', name='luaconfig')
|
|
@@ -37,7 +37,7 @@ FILTER_WORDS = [
|
|
|
'中止公告', '终止公告', '竞卖', '竞买', '论证', '拟建', '审批', '环评'
|
|
|
]
|
|
|
|
|
|
-'''搜索引擎需要过滤屏蔽的网址'''
|
|
|
+'''搜索引擎屏蔽的网址'''
|
|
|
ENGINE_FEATURE_RETRIEVES = [
|
|
|
'microsoft.com',
|
|
|
'cn.bing.com',
|