|
@@ -5,12 +5,16 @@ from common.databases import mongo_table, redis_client
|
|
|
MGO_DATABASE = 'shujuziyuan'
|
|
|
'''判重库'''
|
|
|
MGO_REPETITION = mongo_table(db=MGO_DATABASE, name='repetition_url')
|
|
|
-'''结果'''
|
|
|
-MGO_RECORDS = mongo_table(db=MGO_DATABASE, name='records')
|
|
|
-'''搜索|组织|单位'''
|
|
|
-MGO_VISIT_ORGANIZATION = mongo_table(db=MGO_DATABASE, name='visit_organization')
|
|
|
-'''搜索|关键词'''
|
|
|
-MGO_VISIT_KEYWORDS = mongo_table(db=MGO_DATABASE, name='visit_keywords')
|
|
|
+'''去重域名表'''
|
|
|
+MGO_REMOVAL_DOMAIN = mongo_table(db=MGO_DATABASE, name='removal_duplicate_domains')
|
|
|
+'''新发现的域名'''
|
|
|
+MGO_DOMAIN = mongo_table(db=MGO_DATABASE, name='new_domains')
|
|
|
+'''搜索【组织|单位】'''
|
|
|
+MGO_SEED_ORGS = mongo_table(db=MGO_DATABASE, name='seed_organizations')
|
|
|
+'''搜索关键词'''
|
|
|
+MGO_SEED_KEYWORDS = mongo_table(db=MGO_DATABASE, name='seed_keywords')
|
|
|
+'''种子urls'''
|
|
|
+MGO_SEED_URLS = mongo_table(db=MGO_DATABASE, name='seed_urls')
|
|
|
'''redis'''
|
|
|
REDIS = redis_client()
|
|
|
REDIS_KEY = 'retrieve_urls'
|
|
@@ -21,7 +25,7 @@ REQUIREMENT_PHRASE = [
|
|
|
'议价', '中选', '答疑', '合同', '竞价', '变更', '更正', '预告', '集采', '抽取', '抽签',
|
|
|
'中止公告', '终止公告', '竞卖', '竞买', '论证', '拟建', '审批', '环评'
|
|
|
]
|
|
|
-'''无用词组|广告|涉黄信息|涉黑信息|垃圾'''
|
|
|
+'''需要剔除或者过滤的关键字'''
|
|
|
SENSITIVE_WORDS = [
|
|
|
'通知', '邮箱', '登录'
|
|
|
]
|