data_spider
/
match_spider


			
							123456789101112131415161718192021222324252627282930313233343536373839404142
							# -*- coding: utf-8 -*-
"""
Created on 2023-10-10 
---------
@summary: 标题分词，组合es查询语句
---------
@author: Lzz
"""
import json

import requests
from requests.auth import HTTPBasicAuth

import setting


def get_should(title):
    url = setting.WORD_SEGMENTATION_API
    auth = HTTPBasicAuth(setting.ES_USERNAME, setting.ES_PASSWORD)
    headers = {"Content-Type": "application/json"}
    data = {"analyzer": "ik_smart", "text": title}
    res = requests.post(url, headers=headers, auth=auth, json=data, timeout=10)

    try:
        res_text = json.loads(res.text).get('tokens') or [{"token": title}]
    except:
        res_text = [{"token": title}]

    should_list = []
    for key in res_text:
        single_dict = {
            "multi_match": {
                "query": f"{key.get('token')}",
                "type": "phrase",
                "fields": [
                    "title"
                ]
            }
        }
        should_list.append(single_dict)

    return should_list