dongzhaorui 3 rokov pred
rodič
commit
25d3f30778
2 zmenil súbory, kde vykonal 41 pridanie a 52 odobranie
  1. 0 2
      qlm/config/load.py
  2. 41 50
      qlm/utils/databases.py

+ 0 - 2
qlm/config/load.py

@@ -9,7 +9,6 @@ __all__ = [
     'jy_proxy',
     'es_conf',
     'headers',
-    'analyze_url',
     'node_module_path'
 ]
 
@@ -30,7 +29,6 @@ with open(_yaml_conf, encoding="utf-8") as f:
 with open(_yaml_constants, encoding="utf-8") as fp:
     constants = yaml.safe_load(fp)
     headers: dict = constants['headers']
-    analyze_url = f'http://{es_conf["host"]}:{es_conf["port"]}/{es_conf["db"]}/_analyze'
 
 
 node_module_path = _node_modules

+ 41 - 50
qlm/utils/databases.py

@@ -1,27 +1,41 @@
 import bson
 import pymongo
 import redis
-import requests
 from elasticsearch import Elasticsearch
 
-from config.load import mongo_conf, redis_conf, es_conf, analyze_url
-
+from config.load import mongo_conf, redis_conf, es_conf
 
 # ---------------------------------- mongo ----------------------------------
-def mongo_client(cfg=None):
-    if cfg is None:
-        cfg = mongo_conf
-    return pymongo.MongoClient(host=cfg['host'], port=cfg['port'])
+MONGO_URI_CLIENTS = {}    # a dictionary hold all client with uri as key
+
 
+def mongo_client(cfg=None, host=None, port=None, fork=False, **kwargs):
+    if host is not None and port is not None:
+        uri = f'mongodb://{host}:{port}'
+    else:
+        _cfg = (cfg or mongo_conf)
+        uri = f'mongodb://{_cfg["host"]}:{_cfg["port"]}'
 
-def mongo_database(db: str):
-    client = mongo_client()
-    return client[db]
+    if fork:
+        return pymongo.MongoClient(uri, **kwargs)
+    global MONGO_URI_CLIENTS
+    matched_client = MONGO_URI_CLIENTS.get(uri)
+    if matched_client is None:
+        new_client = pymongo.MongoClient(uri, **kwargs)
+        if new_client is not None:
+            MONGO_URI_CLIENTS[uri] = new_client
+        return new_client
+    return matched_client
 
 
-def mongo_table(db: str, coll: str):
-    client = mongo_client()
-    return client[db][coll]
+def mongo_database(name: str, **kw):
+    client = mongo_client(**kw)
+    return client.get_database(name)
+
+
+def mongo_table(db: str, name: str, **kw):
+    database = mongo_database(db, **kw)
+    return database.get_collection(name)
 
 
 def int2long(param: int):
@@ -40,25 +54,6 @@ def es_client(cfg=None):
     return Elasticsearch([{"host": cfg['host'], "port": cfg['port']}])
 
 
-def es_participles_service(text: str):
-    """
-    获取文本的分词列表
-
-    :param text: 需要分词的文本
-    :return: 分词列表
-    """
-    result = []
-    params = {"text": text, "analyzer": "ik_smart"}
-    res = requests.get(analyze_url, params=params, timeout=60)
-    if res.status_code == 200:
-        tokens = res.json().get('tokens', [])
-        for x in tokens:
-            if x["token"].encode('utf-8').isalpha():
-                continue
-            result.append(x["token"])
-    return result
-
-
 def es_query(title: str, publish_time: int):
     """
     查询es
@@ -70,30 +65,26 @@ def es_query(title: str, publish_time: int):
     client = es_client()
     stime = publish_time - 432000  # 往前推5天
     etime = publish_time + 432000
-    conditions = []
-    participles = es_participles_service(title)
-    for word in participles:
-        conditions.append({
-            "multi_match": {
-                "query": word,
-                "type": "phrase",
-                "fields": ["title"]
-            }
-        })
-    conditions.append({
-        "range": {"publishtime": {"from": stime, "to": etime}}
-    })
+    # 通过发布标题和发布时间范围查询
     query = {
         "query": {
             "bool": {
-                "must": conditions,
-                "minimum_should_match": 1
+                "must": [
+                    {
+                        "multi_match": {
+                            "query": title,
+                            "type": "phrase",
+                            "fields": ["title"]
+                        }
+                    },
+                    {"range": {'publishtime': {"from": stime, "to": etime}}}
+                ]
             }
         }
     }
-    result = client.search(index='bidding', body=query, request_timeout=100)
-    count = len(result['hits']['hits'])
-    return count
+    result = client.search(index=es_conf['db'], body=query, request_timeout=100)
+    total = int(result['hits']['total'])
+    return total
 
 
 # ---------------------------------- redis ----------------------------------