浏览代码

元博网 - 修改 es 查询

dongzhaorui 3 年之前
父节点
当前提交
2c284cd37c
共有 2 个文件被更改,包括 14 次插入38 次删除
  1. 0 2
      ybw/config/load.py
  2. 14 36
      ybw/utils/databases.py

+ 0 - 2
ybw/config/load.py

@@ -12,7 +12,6 @@ __all__ = [
     'jy_proxy',
     'jy_proxy',
     'crawler_url',
     'crawler_url',
     'region',
     'region',
-    'analyze_url',
     'node_module_path'
     'node_module_path'
 ]
 ]
 
 
@@ -35,7 +34,6 @@ with open(_yaml_constants, encoding="utf-8") as fp:
     constants = yaml.safe_load(fp)
     constants = yaml.safe_load(fp)
     headers: dict = constants['headers']
     headers: dict = constants['headers']
     crawler_url: dict = constants['crawler_url']
     crawler_url: dict = constants['crawler_url']
-    analyze_url = f'http://{es_conf["host"]}:{es_conf["port"]}/{es_conf["db"]}/_analyze'
 
 
 with open(_yaml_areas, encoding="utf-8") as fr:
 with open(_yaml_areas, encoding="utf-8") as fr:
     areas = yaml.safe_load(fr)
     areas = yaml.safe_load(fr)

+ 14 - 36
ybw/utils/databases.py

@@ -1,10 +1,9 @@
 import bson
 import bson
 import pymongo
 import pymongo
 import redis
 import redis
-import requests
 from elasticsearch import Elasticsearch
 from elasticsearch import Elasticsearch
 
 
-from config.load import mongo_conf, redis_conf, es_conf, analyze_url
+from config.load import mongo_conf, redis_conf, es_conf
 
 
 # ---------------------------------- mongo ----------------------------------
 # ---------------------------------- mongo ----------------------------------
 MONGO_URI_CLIENTS = {}    # a dictionary hold all client with uri as key
 MONGO_URI_CLIENTS = {}    # a dictionary hold all client with uri as key
@@ -55,25 +54,6 @@ def es_client(cfg=None):
     return Elasticsearch([{"host": cfg['host'], "port": cfg['port']}])
     return Elasticsearch([{"host": cfg['host'], "port": cfg['port']}])
 
 
 
 
-def es_participles_service(text: str):
-    """
-    获取文本的分词列表
-
-    :param text: 需要分词的文本
-    :return: 分词列表
-    """
-    result = []
-    params = {"text": text, "analyzer": "ik_smart"}
-    res = requests.get(analyze_url, params=params, timeout=60)
-    if res.status_code == 200:
-        tokens = res.json().get('tokens', [])
-        for x in tokens:
-            if x["token"].encode('utf-8').isalpha():
-                continue
-            result.append(x["token"])
-    return result
-
-
 def es_query(title: str, publish_time: int):
 def es_query(title: str, publish_time: int):
     """
     """
     查询es
     查询es
@@ -85,28 +65,26 @@ def es_query(title: str, publish_time: int):
     client = es_client()
     client = es_client()
     stime = publish_time - 432000  # 往前推5天
     stime = publish_time - 432000  # 往前推5天
     etime = publish_time + 432000
     etime = publish_time + 432000
-    conditions = []
-    participles = es_participles_service(title)
-    for word in participles:
-        conditions.append({
-            "multi_match": {
-                "query": word,
-                "type": "phrase",
-                "fields": ["title"]
-            }
-        })
-    conditions.append({"range": {"publishtime": {"from": stime, "to": etime}}})
+    # 通过发布标题和发布时间范围查询
     query = {
     query = {
         "query": {
         "query": {
             "bool": {
             "bool": {
-                "must": conditions,
-                "minimum_should_match": 1
+                "must": [
+                    {
+                        "multi_match": {
+                            "query": title,
+                            "type": "phrase",
+                            "fields": ["title"]
+                        }
+                    },
+                    {"range": {'publishtime': {"from": stime, "to": etime}}}
+                ]
             }
             }
         }
         }
     }
     }
     result = client.search(index=es_conf['db'], body=query, request_timeout=100)
     result = client.search(index=es_conf['db'], body=query, request_timeout=100)
-    count = len(result['hits']['hits'])
-    return count
+    total = int(result['hits']['total'])
+    return total
 
 
 
 
 # ---------------------------------- redis ----------------------------------
 # ---------------------------------- redis ----------------------------------