|
@@ -1,46 +0,0 @@
|
|
|
-import requests
|
|
|
-from elasticsearch import Elasticsearch
|
|
|
-
|
|
|
-from config.load import es_conf
|
|
|
-
|
|
|
-es = Elasticsearch([{"host": es_conf['host'], "port": es_conf['port']}])
|
|
|
-
|
|
|
-
|
|
|
-def httpAz(title):
|
|
|
- url = "http://{}:{}/bidding/_analyze".format(es_conf['host'], es_conf['port'])
|
|
|
- params = {"text": title, "analyzer": "ik_smart"}
|
|
|
- arr = []
|
|
|
- # TODO 测试es查询响应时长过长影响效率
|
|
|
- res = requests.get(url=url, params=params, timeout=60)
|
|
|
- if res.status_code == 200:
|
|
|
- tokens = res.json().get('tokens', [])
|
|
|
- for x in tokens:
|
|
|
- if x["token"].encode('utf-8').isalpha():
|
|
|
- continue
|
|
|
- arr.append(x["token"])
|
|
|
-
|
|
|
- q = [{"multi_match": {"query": v, "type": "phrase", "fields": ["title"]}} for v in arr]
|
|
|
- return q
|
|
|
-
|
|
|
-
|
|
|
-def get_es(title, publishtime):
|
|
|
- """
|
|
|
- :param title: 标题
|
|
|
- :param publishtime: 发布时间
|
|
|
- :return:
|
|
|
- """
|
|
|
- stime = publishtime - 432000 # 往前推5天
|
|
|
- etime = publishtime + 432000
|
|
|
- q1 = httpAz(title)
|
|
|
- q1.append({"range": {"publishtime": {"from": stime, "to": etime}}})
|
|
|
- esQuery = {
|
|
|
- "query": {
|
|
|
- "bool": {
|
|
|
- "must": q1,
|
|
|
- "minimum_should_match": 1
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- result = es.search(index='bidding', body=esQuery, request_timeout=100)
|
|
|
- count = len(result['hits']['hits'])
|
|
|
- return count
|