|
@@ -6,7 +6,7 @@ MGO_DATABASE = 'shujuziyuan'
|
|
|
'''垃圾表'''
|
|
|
MGO_REMOVAL_DUPLICATE = mongo_table(db=MGO_DATABASE, name='removal_duplicate')
|
|
|
'''数据挖掘到的结果'''
|
|
|
-MGO_DOMAIN = mongo_table(db=MGO_DATABASE, name='data_excavate')
|
|
|
+MGO_DOMAIN = mongo_table(db=MGO_DATABASE, name='new_domains')
|
|
|
'''数据查询到的单位组织|关键词'''
|
|
|
MGO_QUERY = mongo_table(db=MGO_DATABASE, name='data_query')
|
|
|
''''数据采集记录'''
|
|
@@ -21,20 +21,23 @@ MGO_URLS = mongo_table(db=MGO_DATABASE, name='retrieve_urls')
|
|
|
MGO_COMPETING_GOODS = mongo_table(db=MGO_DATABASE, name='retrieve_competing_goods')
|
|
|
'''luaconfig'''
|
|
|
MGO_LUA_SPIDERS = mongo_table(db='editor', name='luaconfig')
|
|
|
+
|
|
|
'''redis'''
|
|
|
REDIS = redis_client()
|
|
|
-'''词组查询redis队列'''
|
|
|
-REDIS_QUERY = 'retrieve_query'
|
|
|
-'''数据挖掘redis队列'''
|
|
|
-REDIS_EXCAVATE = 'retrieve_excavate'
|
|
|
-'''关键词'''
|
|
|
-REQUIREMENT_PHRASE = [
|
|
|
+'''redis键名前缀'''
|
|
|
+REDIS_QUERY_KEYWORD = 'query_keyword'
|
|
|
+REDIS_QUERY_ORGS = 'query_org'
|
|
|
+REDIS_EXCAVATE = 'data_excavate'
|
|
|
+
|
|
|
+'''过滤词'''
|
|
|
+FILTER_WORDS = [
|
|
|
'竞谈', '发包', '比价', '开标', '邀标', '采购', '招标', '中标', '废标', '成交', '单一', '询价',
|
|
|
'项目结果', '邀请', '磋商', '流标', '谈判', '竞争', '遴选', '比选', '招募', '评标', '资格预审',
|
|
|
'议价', '中选', '答疑', '合同', '竞价', '变更', '更正', '预告', '集采', '抽取', '抽签',
|
|
|
'中止公告', '终止公告', '竞卖', '竞买', '论证', '拟建', '审批', '环评'
|
|
|
]
|
|
|
-'''搜索引擎过滤特征'''
|
|
|
+
|
|
|
+'''搜索引擎需要过滤屏蔽的网址'''
|
|
|
ENGINE_FEATURE_RETRIEVES = [
|
|
|
'microsoft.com',
|
|
|
'cn.bing.com',
|
|
@@ -42,7 +45,3 @@ ENGINE_FEATURE_RETRIEVES = [
|
|
|
'beian.gov.cn/portal/registerSystemInfo',
|
|
|
'baike.baidu.com'
|
|
|
]
|
|
|
-'''特殊编码'''
|
|
|
-SPECIAL_ENCODINGS = [
|
|
|
- 'Windows-1254'
|
|
|
-]
|