3 年前 · c010fd37e2
--- a/find_source/common/databases.py
+++ b/find_source/common/databases.py
@@ -0,0 +1,127 @@
 
				+import bson
			
 
				+import pymongo
			
 
				+import redis
			
 
				+import requests
			
 
				+from elasticsearch import Elasticsearch
			
 
				+
			
 
				+from config.load import mongo_conf, redis_conf, es_conf, analyze_url
			
 
				+
			
 
				+# ---------------------------------- mongo ----------------------------------
			
 
				+MONGO_URI_CLIENTS = {}    # a dictionary hold all client with uri as key
			
 
				+
			
 
				+
			
 
				+def mongo_client(cfg=None, host=None, port=None, fork=False, **kwargs):
			
 
				+    if host is not None and port is not None:
			
 
				+        uri = f'mongodb://{host}:{port}'
			
 
				+    else:
			
 
				+        _cfg = (cfg or mongo_conf)
			
 
				+        uri = f'mongodb://{_cfg["host"]}:{_cfg["port"]}'
			
 
				+
			
 
				+    if fork:
			
 
				+        return pymongo.MongoClient(uri, **kwargs)
			
 
				+    global MONGO_URI_CLIENTS
			
 
				+    matched_client = MONGO_URI_CLIENTS.get(uri)
			
 
				+    if matched_client is None:
			
 
				+        new_client = pymongo.MongoClient(uri, **kwargs)
			
 
				+        if new_client is not None:
			
 
				+            MONGO_URI_CLIENTS[uri] = new_client
			
 
				+        return new_client
			
 
				+    return matched_client
			
 
				+
			
 
				+
			
 
				+def mongo_database(name: str, **kw):
			
 
				+    client = mongo_client(**kw)
			
 
				+    return client.get_database(name)
			
 
				+
			
 
				+
			
 
				+def mongo_table(db: str, name: str, **kw):
			
 
				+    database = mongo_database(db, **kw)
			
 
				+    return database.get_collection(name)
			
 
				+
			
 
				+
			
 
				+def int2long(param: int):
			
 
				+    """int 转换成 long """
			
 
				+    return bson.int64.Int64(param)
			
 
				+
			
 
				+
			
 
				+def object_id(_id: str):
			
 
				+    return bson.objectid.ObjectId(_id)
			
 
				+
			
 
				+
			
 
				+# ---------------------------------- es ----------------------------------
			
 
				+def es_client(cfg=None):
			
 
				+    if cfg is None:
			
 
				+        cfg = es_conf
			
 
				+    return Elasticsearch([{"host": cfg['host'], "port": cfg['port']}])
			
 
				+
			
 
				+
			
 
				+def es_participles_service(text: str):
			
 
				+    """
			
 
				+    获取文本的分词列表
			
 
				+
			
 
				+    :param text: 需要分词的文本
			
 
				+    :return: 分词列表
			
 
				+    """
			
 
				+    result = []
			
 
				+    params = {"text": text, "analyzer": "ik_smart"}
			
 
				+    res = requests.get(analyze_url, params=params, timeout=60)
			
 
				+    if res.status_code == 200:
			
 
				+        tokens = res.json().get('tokens', [])
			
 
				+        for x in tokens:
			
 
				+            if x["token"].encode('utf-8').isalpha():
			
 
				+                continue
			
 
				+            result.append(x["token"])
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def es_query(title: str, publish_time: int):
			
 
				+    """
			
 
				+    查询es
			
 
				+
			
 
				+    :param title: 标题
			
 
				+    :param publish_time: 发布时间
			
 
				+    :return:
			
 
				+    """
			
 
				+    client = es_client()
			
 
				+    stime = publish_time - 432000  # 往前推5天
			
 
				+    etime = publish_time + 432000
			
 
				+    conditions = []
			
 
				+    participles = es_participles_service(title)
			
 
				+    for word in participles:
			
 
				+        conditions.append({
			
 
				+            "multi_match": {
			
 
				+                "query": word,
			
 
				+                "type": "phrase",
			
 
				+                "fields": ["title"]
			
 
				+            }
			
 
				+        })
			
 
				+    conditions.append({
			
 
				+        "range": {"publishtime": {"from": stime, "to": etime}}
			
 
				+    })
			
 
				+    query = {
			
 
				+        "query": {
			
 
				+            "bool": {
			
 
				+                "must": conditions,
			
 
				+                "minimum_should_match": 1
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    result = client.search(index='bidding', body=query, request_timeout=100)
			
 
				+    count = len(result['hits']['hits'])
			
 
				+    return count
			
 
				+
			
 
				+
			
 
				+# ---------------------------------- redis ----------------------------------
			
 
				+def redis_client(cfg=None, host=None, port=None, db=None, password=None):
			
 
				+    if all([host is not None, port is not None, db is not None]):
			
 
				+        host, port, pwd, db = host, port, password, db
			
 
				+    else:
			
 
				+        _cfg = (cfg or redis_conf)
			
 
				+        host, port, pwd, db = _cfg['host'], _cfg['port'], _cfg['pwd'], _cfg['db']
			
 
				+    pool = redis.ConnectionPool(
			
 
				+        host=host,
			
 
				+        port=port,
			
 
				+        password=pwd,
			
 
				+        db=db
			
 
				+    )
			
 
				+    return redis.Redis(connection_pool=pool, decode_responses=True)