12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- # -*- coding: utf-8 -*-
- """
- Created on 2023-12-25
- ---------
- @summary: es
- ---------
- @author: Lzz
- """
- import sys
- import os
- sys.path.append(os.path.dirname(os.getcwd()))
- from elasticsearch import Elasticsearch
- from utils.title_participle import get_should
- # es:
- # host: 172.17.4.184
- # usename: "jybid"
- # pwd: "Top2023_JEB01i@31"
- # port: !!int 19905
- # db: biddingall # es库别名
- def es_client():
- cfg = {"host": "172.17.4.184",
- "port": 19905,
- "usename": "jybid",
- "pwd": "Top2023_JEB01i@31"}
- return Elasticsearch([{"host": cfg['host'], "port": cfg['port']}],http_auth=(cfg['usename'], cfg['pwd']))
- def es_search(title: str, publish_time: int):
- """
- 查询es
- :param title: 标题
- :param publish_time: 发布时间
- :return:
- """
- client = es_client()
- stime = publish_time - 432000 # 往前推5天
- etime = publish_time + 432000
- time_limit = {"range": {"publishtime": {"from": stime, "to": etime}}}
- should_list = get_should(title) # 对标题进行分词组合query语句
- # 通过发布标题和发布时间范围查询
- query = {
- "query": {
- "bool": {
- "must": [time_limit],
- "should": should_list,
- "minimum_should_match": "10<90%",
- }
- }
- }
- result = client.search(index="biddingall", body=query, request_timeout=100)
- total = int(result['hits']['total']['value'])
- return total
|