|
@@ -3,15 +3,23 @@ from common.databases import mongo_table, redis_client
|
|
|
|
|
|
'''Mongo'''
|
|
|
MGO_DATABASE = 'shujuziyuan'
|
|
|
-'''判重样本'''
|
|
|
-MGO_REPETITION = mongo_table(
|
|
|
- host='192.168.3.167',
|
|
|
- port='27082',
|
|
|
- db=MGO_DATABASE,
|
|
|
- name='repetition_url'
|
|
|
-)
|
|
|
-'''关键词组与域名的集合表'''
|
|
|
-MGO_SEARCH = mongo_table(MGO_DATABASE, 'visit')
|
|
|
+'''判重库'''
|
|
|
+MGO_REPETITION = mongo_table(db=MGO_DATABASE, name='repetition_url')
|
|
|
+'''结果'''
|
|
|
+MGO_RECORDS = mongo_table(db=MGO_DATABASE, name='records')
|
|
|
+'''搜索'''
|
|
|
+MGO_VISIT = mongo_table(db=MGO_DATABASE, name='visit')
|
|
|
'''redis'''
|
|
|
REDIS = redis_client()
|
|
|
-REDIS_KEY = 'spider_visit'
|
|
|
+REDIS_KEY = 'retrieve_urls'
|
|
|
+'''关键词'''
|
|
|
+REQUIREMENT_PHRASE = [
|
|
|
+ '竞谈', '发包', '比价', '开标', '邀标', '采购', '招标', '中标', '废标', '成交', '单一', '询价',
|
|
|
+ '项目结果', '邀请', '磋商', '流标', '谈判', '竞争', '遴选', '比选', '招募', '评标', '资格预审',
|
|
|
+ '议价', '中选', '答疑', '合同', '竞价', '变更', '更正', '预告', '集采', '抽取', '抽签',
|
|
|
+ '中止公告', '终止公告', '竞卖', '竞买', '论证', '拟建', '审批', '环评'
|
|
|
+]
|
|
|
+'''没用信息|广告|垃圾消息|敏感词'''
|
|
|
+SENSITIVE_WORDS = [
|
|
|
+ '通知'
|
|
|
+]
|