3 rokov pred · 25d3f30778
--- a/qlm/config/load.py
+++ b/qlm/config/load.py
@@ -9,7 +9,6 @@ __all__ = [
 
				     'jy_proxy',
			
 
				     'es_conf',
			
 
				     'headers',
			
 
				-    'analyze_url',
			
 
				     'node_module_path'
			
 
				 ]
			
 
				 
			
@@ -30,7 +29,6 @@ with open(_yaml_conf, encoding="utf-8") as f:
 
				 with open(_yaml_constants, encoding="utf-8") as fp:
			
 
				     constants = yaml.safe_load(fp)
			
 
				     headers: dict = constants['headers']
			
 
				-    analyze_url = f'http://{es_conf["host"]}:{es_conf["port"]}/{es_conf["db"]}/_analyze'
			
 
				 
			
 
				 
			
 
				 node_module_path = _node_modules
			
--- a/qlm/utils/databases.py
+++ b/qlm/utils/databases.py
@@ -1,27 +1,41 @@
 
				 import bson
			
 
				 import pymongo
			
 
				 import redis
			
 
				-import requests
			
 
				 from elasticsearch import Elasticsearch
			
 
				 
			
 
				-from config.load import mongo_conf, redis_conf, es_conf, analyze_url
			
 
				-
			
 
				+from config.load import mongo_conf, redis_conf, es_conf
			
 
				 
			
 
				 # ---------------------------------- mongo ----------------------------------
			
 
				-def mongo_client(cfg=None):
			
 
				-    if cfg is None:
			
 
				-        cfg = mongo_conf
			
 
				-    return pymongo.MongoClient(host=cfg['host'], port=cfg['port'])
			
 
				+MONGO_URI_CLIENTS = {}    # a dictionary hold all client with uri as key
			
 
				+
			
 
				 
			
 
				+def mongo_client(cfg=None, host=None, port=None, fork=False, **kwargs):
			
 
				+    if host is not None and port is not None:
			
 
				+        uri = f'mongodb://{host}:{port}'
			
 
				+    else:
			
 
				+        _cfg = (cfg or mongo_conf)
			
 
				+        uri = f'mongodb://{_cfg["host"]}:{_cfg["port"]}'
			
 
				 
			
 
				-def mongo_database(db: str):
			
 
				-    client = mongo_client()
			
 
				-    return client[db]
			
 
				+    if fork:
			
 
				+        return pymongo.MongoClient(uri, **kwargs)
			
 
				+    global MONGO_URI_CLIENTS
			
 
				+    matched_client = MONGO_URI_CLIENTS.get(uri)
			
 
				+    if matched_client is None:
			
 
				+        new_client = pymongo.MongoClient(uri, **kwargs)
			
 
				+        if new_client is not None:
			
 
				+            MONGO_URI_CLIENTS[uri] = new_client
			
 
				+        return new_client
			
 
				+    return matched_client
			
 
				 
			
 
				 
			
 
				-def mongo_table(db: str, coll: str):
			
 
				-    client = mongo_client()
			
 
				-    return client[db][coll]
			
 
				+def mongo_database(name: str, **kw):
			
 
				+    client = mongo_client(**kw)
			
 
				+    return client.get_database(name)
			
 
				+
			
 
				+
			
 
				+def mongo_table(db: str, name: str, **kw):
			
 
				+    database = mongo_database(db, **kw)
			
 
				+    return database.get_collection(name)
			
 
				 
			
 
				 
			
 
				 def int2long(param: int):
			
@@ -40,25 +54,6 @@ def es_client(cfg=None):
 
				     return Elasticsearch([{"host": cfg['host'], "port": cfg['port']}])
			
 
				 
			
 
				 
			
 
				-def es_participles_service(text: str):
			
 
				-    """
			
 
				-    获取文本的分词列表
			
 
				-
			
 
				-    :param text: 需要分词的文本
			
 
				-    :return: 分词列表
			
 
				-    """
			
 
				-    result = []
			
 
				-    params = {"text": text, "analyzer": "ik_smart"}
			
 
				-    res = requests.get(analyze_url, params=params, timeout=60)
			
 
				-    if res.status_code == 200:
			
 
				-        tokens = res.json().get('tokens', [])
			
 
				-        for x in tokens:
			
 
				-            if x["token"].encode('utf-8').isalpha():
			
 
				-                continue
			
 
				-            result.append(x["token"])
			
 
				-    return result
			
 
				-
			
 
				-
			
 
				 def es_query(title: str, publish_time: int):
			
 
				     """
			
 
				     查询es
			
@@ -70,30 +65,26 @@ def es_query(title: str, publish_time: int):
 
				     client = es_client()
			
 
				     stime = publish_time - 432000  # 往前推5天
			
 
				     etime = publish_time + 432000
			
 
				-    conditions = []
			
 
				-    participles = es_participles_service(title)
			
 
				-    for word in participles:
			
 
				-        conditions.append({
			
 
				-            "multi_match": {
			
 
				-                "query": word,
			
 
				-                "type": "phrase",
			
 
				-                "fields": ["title"]
			
 
				-            }
			
 
				-        })
			
 
				-    conditions.append({
			
 
				-        "range": {"publishtime": {"from": stime, "to": etime}}
			
 
				-    })
			
 
				+    # 通过发布标题和发布时间范围查询
			
 
				     query = {
			
 
				         "query": {
			
 
				             "bool": {
			
 
				-                "must": conditions,
			
 
				-                "minimum_should_match": 1
			
 
				+                "must": [
			
 
				+                    {
			
 
				+                        "multi_match": {
			
 
				+                            "query": title,
			
 
				+                            "type": "phrase",
			
 
				+                            "fields": ["title"]
			
 
				+                        }
			
 
				+                    },
			
 
				+                    {"range": {'publishtime': {"from": stime, "to": etime}}}
			
 
				+                ]
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				-    result = client.search(index='bidding', body=query, request_timeout=100)
			
 
				-    count = len(result['hits']['hits'])
			
 
				-    return count
			
 
				+    result = client.search(index=es_conf['db'], body=query, request_timeout=100)
			
 
				+    total = int(result['hits']['total'])
			
 
				+    return total
			
 
				 
			
 
				 
			
 
				 # ---------------------------------- redis ----------------------------------