dongzhaorui 3 jaren geleden
bovenliggende
commit
07a6a61676
1 gewijzigde bestanden met toevoegingen van 11 en 12 verwijderingen
  1. 11 12
      find_source/settings.py

+ 11 - 12
find_source/settings.py

@@ -6,7 +6,7 @@ MGO_DATABASE = 'shujuziyuan'
 '''垃圾表'''
 MGO_REMOVAL_DUPLICATE = mongo_table(db=MGO_DATABASE, name='removal_duplicate')
 '''数据挖掘到的结果'''
-MGO_DOMAIN = mongo_table(db=MGO_DATABASE, name='data_excavate')
+MGO_DOMAIN = mongo_table(db=MGO_DATABASE, name='new_domains')
 '''数据查询到的单位组织|关键词'''
 MGO_QUERY = mongo_table(db=MGO_DATABASE, name='data_query')
 ''''数据采集记录'''
@@ -21,20 +21,23 @@ MGO_URLS = mongo_table(db=MGO_DATABASE, name='retrieve_urls')
 MGO_COMPETING_GOODS = mongo_table(db=MGO_DATABASE, name='retrieve_competing_goods')
 '''luaconfig'''
 MGO_LUA_SPIDERS = mongo_table(db='editor', name='luaconfig')
+
 '''redis'''
 REDIS = redis_client()
-'''词组查询redis队列'''
-REDIS_QUERY = 'retrieve_query'
-'''数据挖掘redis队列'''
-REDIS_EXCAVATE = 'retrieve_excavate'
-'''关键词'''
-REQUIREMENT_PHRASE = [
+'''redis键名前缀'''
+REDIS_QUERY_KEYWORD = 'query_keyword'
+REDIS_QUERY_ORGS = 'query_org'
+REDIS_EXCAVATE = 'data_excavate'
+
+'''过滤词'''
+FILTER_WORDS = [
     '竞谈', '发包', '比价', '开标', '邀标', '采购', '招标', '中标', '废标', '成交', '单一', '询价',
     '项目结果', '邀请', '磋商', '流标', '谈判', '竞争', '遴选', '比选', '招募', '评标', '资格预审',
     '议价', '中选', '答疑', '合同', '竞价', '变更', '更正', '预告', '集采', '抽取', '抽签',
     '中止公告', '终止公告', '竞卖', '竞买', '论证', '拟建', '审批', '环评'
 ]
-'''搜索引擎过滤特征'''
+
+'''搜索引擎需要过滤屏蔽的网址'''
 ENGINE_FEATURE_RETRIEVES = [
     'microsoft.com',
     'cn.bing.com',
@@ -42,7 +45,3 @@ ENGINE_FEATURE_RETRIEVES = [
     'beian.gov.cn/portal/registerSystemInfo',
     'baike.baidu.com'
 ]
-'''特殊编码'''
-SPECIAL_ENCODINGS = [
-    'Windows-1254'
-]