es_query.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2023-12-25
  4. ---------
  5. @summary: es
  6. ---------
  7. @author: Lzz
  8. """
  9. import sys
  10. import os
  11. sys.path.append(os.path.dirname(os.getcwd()))
  12. from elasticsearch import Elasticsearch
  13. from utils.title_participle import get_should
  14. # es:
  15. # host: 172.17.4.184
  16. # usename: "jybid"
  17. # pwd: "Top2023_JEB01i@31"
  18. # port: !!int 19905
  19. # db: biddingall # es库别名
  20. def es_client():
  21. cfg = {"host": "172.17.4.184",
  22. "port": 19905,
  23. "usename": "jybid",
  24. "pwd": "Top2023_JEB01i@31"}
  25. return Elasticsearch([{"host": cfg['host'], "port": cfg['port']}],http_auth=(cfg['usename'], cfg['pwd']))
  26. def es_search(title: str, publish_time: int):
  27. """
  28. 查询es
  29. :param title: 标题
  30. :param publish_time: 发布时间
  31. :return:
  32. """
  33. client = es_client()
  34. stime = publish_time - 432000 # 往前推5天
  35. etime = publish_time + 432000
  36. time_limit = {"range": {"publishtime": {"from": stime, "to": etime}}}
  37. should_list = get_should(title) # 对标题进行分词组合query语句
  38. # 通过发布标题和发布时间范围查询
  39. query = {
  40. "query": {
  41. "bool": {
  42. "must": [time_limit],
  43. "should": should_list,
  44. "minimum_should_match": "10<90%",
  45. }
  46. }
  47. }
  48. result = client.search(index="biddingall", body=query, request_timeout=100)
  49. total = int(result['hits']['total']['value'])
  50. return total