import bson import pymongo from elasticsearch import Elasticsearch import setting from utils.title_participle import get_should # ---------------------------------- mongo ---------------------------------- MONGO_URI_CLIENTS = {} # a dictionary hold all client with uri as key def mongo_client(cfg=None, host=None, port=None, fork=False, **kwargs): if host is not None and port is not None: uri = f'mongodb://{host}:{port}' else: _cfg = (cfg or {'host': setting.MONGO_IP, 'port': setting.MONGO_PORT}) uri = f'mongodb://{_cfg["host"]}:{_cfg["port"]}' if fork: return pymongo.MongoClient(uri, **kwargs) global MONGO_URI_CLIENTS matched_client = MONGO_URI_CLIENTS.get(uri) if matched_client is None: new_client = pymongo.MongoClient(uri, **kwargs) if new_client is not None: MONGO_URI_CLIENTS[uri] = new_client return new_client return matched_client def mongo_database(name: str, **kwargs): client = mongo_client(**kwargs) return client.get_database(name) def mongo_table(db: str, name: str, **kwargs): database = mongo_database(db, **kwargs) return database.get_collection(name) def int2long(param: int): """int 转换成 long """ return bson.int64.Int64(param) def object_id(_id: str): return bson.objectid.ObjectId(_id) # ---------------------------------- es ---------------------------------- def es_client(cfg=None): if cfg is None: cfg = { 'host': setting.ES_IP, 'username': setting.ES_USERNAME, 'pwd': setting.ES_PASSWORD, 'port': setting.ES_PORT, } hosts = [{'host': cfg['host'], 'port': cfg['port']}] auth = (cfg['username'], cfg['pwd']) return Elasticsearch(hosts, http_auth=auth) def es_query(title: str, publish_time: int): """ 查询es :param title: 标题 :param publish_time: 发布时间 :return: """ client = es_client() stime = publish_time - 432000 # 往前推5天 etime = publish_time + 432000 time_limit = {"range": {'publishtime': {"from": stime, "to": etime}}} should_list = get_should(title) # 对标题进行分词组合query语句 # 通过发布标题和发布时间范围查询 query = { "query": { "bool": { "must": [time_limit], "should": should_list, "minimum_should_match": "10<90%", } } } result = client.search(index=setting.ES_INDEX, body=query, request_timeout=100) total = int(result['hits']['total']['value']) return total