title_participle.py 1013 B

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2023-10-10
  4. ---------
  5. @summary: 标题分词,组合es查询语句
  6. ---------
  7. @author: Lzz
  8. """
  9. import json
  10. import requests
  11. from requests.auth import HTTPBasicAuth
  12. import setting
  13. def get_should(title):
  14. url = setting.WORD_SEGMENTATION_API
  15. auth = HTTPBasicAuth(setting.ES_USERNAME, setting.ES_PASSWORD)
  16. headers = {"Content-Type": "application/json"}
  17. data = {"analyzer": "ik_smart", "text": title}
  18. res = requests.post(url, headers=headers, auth=auth, json=data, timeout=10)
  19. try:
  20. res_text = json.loads(res.text).get('tokens') or [{"token": title}]
  21. except:
  22. res_text = [{"token": title}]
  23. should_list = []
  24. for key in res_text:
  25. single_dict = {
  26. "multi_match": {
  27. "query": f"{key.get('token')}",
  28. "type": "phrase",
  29. "fields": [
  30. "title"
  31. ]
  32. }
  33. }
  34. should_list.append(single_dict)
  35. return should_list