1 жил өмнө · c95f3f51c4
--- a/ybw/config/conf.yaml
+++ b/ybw/config/conf.yaml
@@ -1,25 +0,0 @@
 
				-mongo:
			
 
				-  host: 172.17.4.87
			
 
				-  port: !!int 27080
			
 
				-
			
 
				-
			
 
				-redis:
			
 
				-  host: 172.17.162.28
			
 
				-  port: !!int 7361
			
 
				-  pwd: "k5ZJR5KV4q7DRZ92DQ"
			
 
				-  db: !!int 1
			
 
				-
			
 
				-
			
 
				-es:
			
 
				-  host: 172.17.4.184
			
 
				-  usename: "jybid"
			
 
				-  pwd: "Top2023_JEB01i@31"
			
 
				-  port: !!int 19905
			
 
				-  db: biddingall # es库别名
			
 
				-
			
 
				-
			
 
				-proxy:
			
 
				-  socks5:
			
 
				-    url: http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch
			
 
				-    auth:
			
 
				-      Authorization: Basic amlhbnl1MDAxOjEyM3F3ZSFB
			
--- a/ybw/config/constants.yaml
+++ b/ybw/config/constants.yaml
@@ -1,8 +1,3 @@
 
				-headers:
			
 
				-  User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36
			
 
				-  Accept: '*/*'
			
 
				-
			
 
				-
			
 
				 crawler_url:
			
 
				   home_page: https://www.chinabidding.cn/search/searchgj/zbcg?areaid={}&keywords=&time_start={}&time_end={}&search_type=CONTEXT&categoryid=&rp={}&table_type={}&b_date=custom
			
 
				   list_url: https://www.chinabidding.cn/search/searchgj/zbcg?areaid={}&keywords=&time_start={}&time_end={}&page={}&search_type=TITLE&categoryid=&rp={}&table_type={}&b_date=custom
			
--- a/ybw/config/load.py
+++ b/ybw/config/load.py
@@ -2,35 +2,15 @@ from pathlib import Path
 
				 
			
 
				 import yaml
			
 
				 
			
 
				-__all__ = [
			
 
				-    'mongo_conf',
			
 
				-    'redis_conf',
			
 
				-    'es_conf',
			
 
				-    'constants',
			
 
				-    'headers',
			
 
				-    'jy_proxy',
			
 
				-    'crawler_url',
			
 
				-    'region',
			
 
				-    'node_module_path'
			
 
				-]
			
 
				+__all__ = ['constants', 'crawler_url', 'region', 'node_module_path']
			
 
				 
			
 
				 _base_path = Path(__file__).parent
			
 
				-_yaml_conf = (_base_path / 'conf.yaml').resolve()
			
 
				 _yaml_constants = (_base_path / 'constants.yaml').resolve()
			
 
				 _yaml_areas = (_base_path / 'areas.yaml').resolve()
			
 
				 _node_modules = (_base_path.parent / 'node_modules').resolve()
			
 
				 
			
 
				-with open(_yaml_conf, encoding="utf-8") as f:
			
 
				-    conf = yaml.safe_load(f)
			
 
				-    mongo_conf = conf['mongo']
			
 
				-    redis_conf = conf['redis']
			
 
				-    es_conf: dict = conf['es']
			
 
				-    jy_proxy: dict = conf['proxy']
			
 
				-
			
 
				-
			
 
				 with open(_yaml_constants, encoding="utf-8") as fp:
			
 
				     constants = yaml.safe_load(fp)
			
 
				-    headers: dict = constants['headers']
			
 
				     crawler_url: dict = constants['crawler_url']
			
 
				 
			
 
				 with open(_yaml_areas, encoding="utf-8") as fr:
			
--- a/ybw/crawler/account.py
+++ b/ybw/crawler/account.py
@@ -1,31 +1,26 @@
 
				 import json
			
 
				-from pathlib import Path
			
 
				 
			
 
				 import requests
			
 
				+from bson.objectid import ObjectId
			
 
				 
			
 
				+import setting
			
 
				+from utils.databases import mongo_table
			
 
				 from utils.log import logger
			
 
				 from utils.tools import wait
			
 
				-from utils.databases import mongo_table
			
 
				-from bson.objectid import ObjectId
			
 
				-
			
 
				-ROOT_PATH = Path(__file__).parent.parent
			
 
				 
			
 
				-_headers = {"Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB"}
			
 
				-JSON_ACCOUNT_RECORD = (ROOT_PATH / 'config/account_record.json').resolve()
			
 
				+HEADERS = {"Authorization": setting.JY_AUTH_TOKEN}
			
 
				+ACCOUNT_DATA = (setting.ROOT_PATH / 'config/account_record.json').resolve()
			
 
				 
			
 
				 
			
 
				 def account_record(uid, crawl_type):
			
 
				-    with open(JSON_ACCOUNT_RECORD, 'w+', encoding='utf-8') as wp:
			
 
				-        item = {
			
 
				-            "uid": uid,
			
 
				-            "crawl_type": crawl_type
			
 
				-        }
			
 
				+    with open(ACCOUNT_DATA, 'w+', encoding='utf-8') as wp:
			
 
				+        item = {"uid": uid, "crawl_type": crawl_type}
			
 
				         wp.write(json.dumps(item, indent=4))
			
 
				 
			
 
				 
			
 
				 def read_account():
			
 
				     try:
			
 
				-        with open(JSON_ACCOUNT_RECORD, encoding='utf-8') as rp:
			
 
				+        with open(ACCOUNT_DATA, encoding='utf-8') as rp:
			
 
				             cookies: dict = json.load(rp)
			
 
				             return cookies
			
 
				     except (json.decoder.JSONDecodeError, FileNotFoundError):
			
@@ -33,16 +28,14 @@ def read_account():
 
				 
			
 
				 
			
 
				 def get_account(site, crawl_type):
			
 
				-    url = "http://cc.spdata.jianyu360.com/competing_goods/account/fetch"
			
 
				-    params = {
			
 
				-        "site": site,
			
 
				-        "crawl_type": crawl_type
			
 
				-    }
			
 
				+    params = {"site": site, "crawl_type": crawl_type}
			
 
				     try:
			
 
				-        response = requests.get(url,
			
 
				-                                headers=_headers,
			
 
				-                                params=params,
			
 
				-                                timeout=60)
			
 
				+        response = requests.get(
			
 
				+            setting.ACCOUNT_FETCH_API,
			
 
				+            headers=HEADERS,
			
 
				+            params=params,
			
 
				+            timeout=60
			
 
				+        )
			
 
				         data = response.json()['data']
			
 
				         logger.info("当前账号状态：{}".format(data['crawl_detail']))
			
 
				     except requests.RequestException:
			
@@ -53,20 +46,20 @@ def get_account(site, crawl_type):
 
				 
			
 
				 
			
 
				 def release_account(uid, crawl_type, disable_log=False):
			
 
				-    url = "http://cc.spdata.jianyu360.com/competing_goods/account/release"
			
 
				     if uid is not None:
			
 
				-        params = {
			
 
				-            "uid": uid,
			
 
				-            "crawl_type": crawl_type
			
 
				-        }
			
 
				+        params = {"uid": uid, "crawl_type": crawl_type}
			
 
				         while True:
			
 
				             try:
			
 
				-                response = requests.get(url,
			
 
				-                                        headers=_headers,
			
 
				-                                        params=params,
			
 
				-                                        timeout=60)
			
 
				+                response = requests.get(
			
 
				+                    setting.ACCOUNT_RELEASE_API,
			
 
				+                    headers=HEADERS,
			
 
				+                    params=params,
			
 
				+                    timeout=60
			
 
				+                )
			
 
				                 if response.status_code == 200:
			
 
				-                    acc_status = mongo_table('py_spider', 'match_account').find_one({'_id': ObjectId(uid)})['crawl_detail']
			
 
				+                    collection = mongo_table('py_spider', 'match_account')
			
 
				+                    ret = collection.find_one({'_id': ObjectId(uid)})
			
 
				+                    acc_status = ret['crawl_detail']
			
 
				                     if not disable_log:
			
 
				                         logger.info(f"release_account >>> {response.json()}, status : {acc_status}")
			
 
				                     break
			
--- a/ybw/crawler/login.py
+++ b/ybw/crawler/login.py
@@ -4,22 +4,20 @@ import threading
 
				 import time
			
 
				 import uuid
			
 
				 from collections import namedtuple
			
 
				-from pathlib import Path
			
 
				 
			
 
				 import execjs
			
 
				 import requests
			
 
				 from requests import Session
			
 
				 from requests.utils import dict_from_cookiejar
			
 
				 
			
 
				+import setting
			
 
				 from config.load import node_module_path
			
 
				 from utils.execptions import CrawlError
			
 
				 from utils.log import logger
			
 
				 
			
 
				-LOCK = threading.RLock()
			
 
				-
			
 
				-ROOT_PATH = Path(__file__).parent.parent
			
 
				-JSON_LOGIN_COOKIE = (ROOT_PATH / 'config/login_cookie.json').resolve()
			
 
				+RLOCK = threading.RLock()
			
 
				 
			
 
				+JSON_LOGIN_COOKIE = (setting.ROOT_PATH / 'config/login_cookie.json').resolve()
			
 
				 User = namedtuple('User', ['phone', 'passwd'])
			
 
				 
			
 
				 
			
@@ -42,7 +40,7 @@ def load_login_cookies(user_name: str):
 
				 
			
 
				 
			
 
				 def save_login_cookies(user_name: str, login_cookie: dict):
			
 
				-    with LOCK:
			
 
				+    with RLOCK:
			
 
				         # 文件存在就读取,不存在就创建
			
 
				         fp = _open_file()
			
 
				         # 内容存在就加载到内存,不存在就设置为空字典
			
@@ -73,7 +71,7 @@ def update_login_cookies(user_name: str, update_val: dict):
 
				         update_val: 需要更新的cookie内容
			
 
				 
			
 
				     """
			
 
				-    with LOCK:
			
 
				+    with RLOCK:
			
 
				         fp = open(JSON_LOGIN_COOKIE, encoding='utf-8')
			
 
				         user_maps: dict = json.load(fp)
			
 
				         login_cookies: dict = user_maps.get(user_name)
			
--- a/ybw/setting.py
+++ b/ybw/setting.py
@@ -0,0 +1,32 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on 2024-02-28 
			
 
				+---------
			
 
				+@summary:  全局配置
			
 
				+---------
			
 
				+@author: Dzr
			
 
				+"""
			
 
				+import pathlib
			
 
				+
			
 
				+ROOT_PATH = pathlib.Path(__file__).absolute().parent
			
 
				+
			
 
				+MONGO_IP = "172.17.4.87"
			
 
				+MONGO_PORT = 27080
			
 
				+MONGO_DB = "py_spider"
			
 
				+
			
 
				+REDIS_URL = "redis://:k5ZJR5KV4q7DRZ92DQ@172.17.4.240:8361/0"
			
 
				+# REDIS_URL = "redis://default:top@123@192.168.3.165:8165/5"
			
 
				+
			
 
				+ES_IP = "172.17.4.184"
			
 
				+ES_PORT = 19905
			
 
				+ES_USERNAME = "jybid"
			
 
				+ES_PASSWORD = "Top2023_JEB01i@31"
			
 
				+ES_INDEX = "biddingall"  # es库别名
			
 
				+WORD_SEGMENTATION_API = "http://172.17.4.184:19905/_analyze"  # 分词接口
			
 
				+# WORD_SEGMENTATION_API = "http://192.168.3.149:9201/_analyze"
			
 
				+# WORD_SEGMENTATION_API = "http://192.168.3.182:1990/_analyze"
			
 
				+
			
 
				+JY_AUTH_TOKEN = "Basic amlhbnl1MDAxOjEyM3F3ZSFB"
			
 
				+PROXY_API = "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch"
			
 
				+ACCOUNT_FETCH_API = "http://cc.spdata.jianyu360.com/competing_goods/account/fetch"
			
 
				+ACCOUNT_RELEASE_API = "http://cc.spdata.jianyu360.com/competing_goods/account/release"
			
--- a/ybw/utils/RedisDB.py
+++ b/ybw/utils/RedisDB.py
@@ -10,14 +10,15 @@ import hashlib
 
				 
			
 
				 import redis
			
 
				 
			
 
				+import setting
			
 
				 
			
 
				-class RedisFilter:
			
 
				-    redis_db = None
			
 
				-
			
 
				-    def __init__(self, redis_url=None, expire_time=None):
			
 
				 
			
 
				-        self.__class__.redis_db = redis.StrictRedis.from_url(redis_url)  # 单机
			
 
				+class RedisFilter:
			
 
				 
			
 
				+    def __init__(self, url=None, expire_time=None):
			
 
				+        if not url:
			
 
				+            url = setting.REDIS_URL
			
 
				+        self.redis_db = redis.StrictRedis.from_url(url)
			
 
				         self._ex = expire_time or 86400 * 365 * 1  # 1年 = 86400 * 365 * 1
			
 
				 
			
 
				     def __repr__(self):
			
@@ -29,19 +30,21 @@ class RedisFilter:
 
				             return True
			
 
				         return False
			
 
				 
			
 
				-    def add(self, keys, *args, **kwargs):
			
 
				+    def add(self, keys):
			
 
				         """
			
 
				-        添加数据  删除数据：redis_db.delete("pylist_" + key)
			
 
				+        添加数据
			
 
				+
			
 
				         @param keys: 检查关键词在 redis 中是否存在，支持列表批量
			
 
				-        @return: list / 单个值(如果数据已存在 返回 False 否则返回 True, 可以理解为是否添加成功)
			
 
				+        @return: list / 单个值(添加失败返回False, 添加成功返回True)
			
 
				         """
			
 
				         is_list = isinstance(keys, list)
			
 
				         keys = keys if is_list else [keys]
			
 
				 
			
 
				         is_added = []
			
 
				         for key in keys:
			
 
				-            if not self.exists(key):
			
 
				-                is_added.append(self.redis_db.set(key, 1, ex=self._ex))
			
 
				+            pkey = "pylist_" + self.fingerprint(key)
			
 
				+            if not self.exists(pkey):
			
 
				+                is_added.append(self.redis_db.set(pkey, 1, ex=self._ex))
			
 
				             else:
			
 
				                 is_added.append(False)
			
 
				 
			
@@ -58,7 +61,8 @@ class RedisFilter:
 
				 
			
 
				         is_exist = []
			
 
				         for key in keys:
			
 
				-            is_exist.append(self.exists(key))
			
 
				+            pkey = "pylist_" + self.fingerprint(key)
			
 
				+            is_exist.append(self.exists(pkey))
			
 
				 
			
 
				         # 判断数据本身是否重复
			
 
				         temp_set = set()
			
@@ -70,37 +74,16 @@ class RedisFilter:
 
				 
			
 
				         return is_exist if is_list else is_exist[0]
			
 
				 
			
 
				-
			
 
				-def get_sha256(*args):
			
 
				-    """
			
 
				-    @summary: 获取唯一的64位值， 用于获取唯一的id
			
 
				-    ---------
			
 
				-    @param *args: 参与联合去重的值
			
 
				-    ---------
			
 
				-    @result: 5580c91ea29bf5bd963f4c08dfcacd983566e44ecea1735102bc380576fd6f30
			
 
				-    """
			
 
				-
			
 
				-    sha256 = hashlib.sha256()
			
 
				-    for arg in args:
			
 
				-        sha256.update(str(arg).encode())
			
 
				-    return sha256.hexdigest()  # 64位
			
 
				-
			
 
				-
			
 
				-def rexists(dedup, data):
			
 
				-    data = [data] if not isinstance(data, list) else data
			
 
				-    args = sorted(data)
			
 
				-    pykey = "pylist_" + get_sha256(*args)
			
 
				-    if dedup.get(pykey):
			
 
				-        ''' 存在 '''
			
 
				-        return True
			
 
				-    else:
			
 
				-        ''' 不存在 '''
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-def radd(dedup, data):
			
 
				-    data = [data] if not isinstance(data, list) else data
			
 
				-    args = sorted(data)
			
 
				-    pykey = "pylist_" + get_sha256(*args)
			
 
				-    state = dedup.add(pykey)
			
 
				-    return state
			
 
				+    def fingerprint(self, *args):
			
 
				+        """
			
 
				+        @summary: 获取唯一的64位值,获取唯一数据指纹
			
 
				+        ---------
			
 
				+        @param args: 去重数据集合
			
 
				+        ---------
			
 
				+        @result: 5580c91ea29bf5bd963f4c08dfcacd983566e44ecea1735102bc380576fd6f30
			
 
				+        """
			
 
				+        args = sorted(args)
			
 
				+        sha256 = hashlib.sha256()
			
 
				+        for arg in args:
			
 
				+            sha256.update(str(arg).encode())
			
 
				+        return sha256.hexdigest()
			
--- a/ybw/utils/databases.py
+++ b/ybw/utils/databases.py
@@ -1,10 +1,9 @@
 
				 import bson
			
 
				 import pymongo
			
 
				-import redis
			
 
				 from elasticsearch import Elasticsearch
			
 
				-from utils.title_participle import get_should
			
 
				 
			
 
				-from config.load import mongo_conf, redis_conf, es_conf
			
 
				+import setting
			
 
				+from utils.title_participle import get_should
			
 
				 
			
 
				 # ---------------------------------- mongo ----------------------------------
			
 
				 MONGO_URI_CLIENTS = {}    # a dictionary hold all client with uri as key
			
@@ -14,11 +13,12 @@ def mongo_client(cfg=None, host=None, port=None, fork=False, **kwargs):
 
				     if host is not None and port is not None:
			
 
				         uri = f'mongodb://{host}:{port}'
			
 
				     else:
			
 
				-        _cfg = (cfg or mongo_conf)
			
 
				+        _cfg = (cfg or {'host': setting.MONGO_IP, 'port': setting.MONGO_PORT})
			
 
				         uri = f'mongodb://{_cfg["host"]}:{_cfg["port"]}'
			
 
				 
			
 
				     if fork:
			
 
				         return pymongo.MongoClient(uri, **kwargs)
			
 
				+
			
 
				     global MONGO_URI_CLIENTS
			
 
				     matched_client = MONGO_URI_CLIENTS.get(uri)
			
 
				     if matched_client is None:
			
@@ -29,13 +29,13 @@ def mongo_client(cfg=None, host=None, port=None, fork=False, **kwargs):
 
				     return matched_client
			
 
				 
			
 
				 
			
 
				-def mongo_database(name: str, **kw):
			
 
				-    client = mongo_client(**kw)
			
 
				+def mongo_database(name: str, **kwargs):
			
 
				+    client = mongo_client(**kwargs)
			
 
				     return client.get_database(name)
			
 
				 
			
 
				 
			
 
				-def mongo_table(db: str, name: str, **kw):
			
 
				-    database = mongo_database(db, **kw)
			
 
				+def mongo_table(db: str, name: str, **kwargs):
			
 
				+    database = mongo_database(db, **kwargs)
			
 
				     return database.get_collection(name)
			
 
				 
			
 
				 
			
@@ -51,8 +51,16 @@ def object_id(_id: str):
 
				 # ---------------------------------- es ----------------------------------
			
 
				 def es_client(cfg=None):
			
 
				     if cfg is None:
			
 
				-        cfg = es_conf
			
 
				-    return Elasticsearch([{"host": cfg['host'], "port": cfg['port']}],http_auth=(cfg['usename'], cfg['pwd']))
			
 
				+        cfg = {
			
 
				+            'host': setting.ES_IP,
			
 
				+            'username': setting.ES_USERNAME,
			
 
				+            'pwd': setting.ES_PASSWORD,
			
 
				+            'port': setting.ES_PORT,
			
 
				+        }
			
 
				+
			
 
				+    hosts = [{'host': cfg['host'], 'port': cfg['port']}]
			
 
				+    auth = (cfg['username'], cfg['pwd'])
			
 
				+    return Elasticsearch(hosts, http_auth=auth)
			
 
				 
			
 
				 
			
 
				 def es_query(title: str, publish_time: int):
			
@@ -79,19 +87,6 @@ def es_query(title: str, publish_time: int):
 
				             }
			
 
				         }
			
 
				     }
			
 
				-    result = client.search(index=es_conf['db'], body=query, request_timeout=100)
			
 
				+    result = client.search(index=setting.ES_INDEX, body=query, request_timeout=100)
			
 
				     total = int(result['hits']['total']['value'])
			
 
				     return total
			
 
				-
			
 
				-
			
 
				-# ---------------------------------- redis ----------------------------------
			
 
				-def redis_client(cfg=None):
			
 
				-    if cfg is None:
			
 
				-        cfg = redis_conf
			
 
				-    pool = redis.ConnectionPool(
			
 
				-        host=cfg['host'],
			
 
				-        port=cfg['port'],
			
 
				-        password=cfg['pwd'],
			
 
				-        db=cfg['db']
			
 
				-    )
			
 
				-    return redis.Redis(connection_pool=pool, decode_responses=True)
			
--- a/ybw/utils/socks5.py
+++ b/ybw/utils/socks5.py
@@ -8,29 +8,29 @@ Created on 2024-02-27
 
				 """
			
 
				 import requests
			
 
				 
			
 
				-from config.load import jy_proxy
			
 
				+import setting
			
 
				+from utils.log import logger
			
 
				 
			
 
				 
			
 
				 def get_proxy(scheme=None, default=None, socks5h=False):
			
 
				-    headers = jy_proxy['socks5']['auth']
			
 
				-    url = jy_proxy['socks5']['url']
			
 
				+    headers = {'Authorization': setting.JY_AUTH_TOKEN}
			
 
				     try:
			
 
				-        proxy_res = requests.get(url, headers=headers).json()
			
 
				-        proxies = proxy_res.get('data')
			
 
				-        if proxy_res and proxies:
			
 
				+        resp = requests.get(setting.PROXY_API, headers=headers, timeout=5).json()
			
 
				+        proxies = resp.get("data")
			
 
				+        if resp and proxies:
			
 
				             if socks5h:
			
 
				-                proxyh = {}
			
 
				                 proxy_items = proxies.get("http")
			
 
				-                proxyh["http"] = proxy_items.replace("socks5", "socks5h")
			
 
				-                proxyh["https"] = proxy_items.replace("socks5", "socks5h")
			
 
				-                proxies = proxyh
			
 
				-            # print(f"切换代理：{proxies}")
			
 
				-            if not scheme:
			
 
				-                return proxies
			
 
				-            else:
			
 
				-                return proxies.get(scheme, default)
			
 
				+                # 请求代理 vps dns域名解析
			
 
				+                proxies = dict(
			
 
				+                    http=proxy_items.replace("socks5", "socks5h"),
			
 
				+                    https=proxy_items.replace("socks5", "socks5h")
			
 
				+                )
			
 
				+
			
 
				+            logger.debug(f"切换代理:{proxies}")
			
 
				+            return proxies if not scheme else proxies.get(scheme, default)
			
 
				         else:
			
 
				-            print("暂无代理...")
			
 
				+            logger.info("暂无代理...")
			
 
				     except Exception:
			
 
				         pass
			
 
				-    return None
			
 
				+
			
 
				+    return default
			
--- a/ybw/utils/title_participle.py
+++ b/ybw/utils/title_participle.py
@@ -6,31 +6,25 @@ Created on 2023-10-10
 
				 ---------
			
 
				 @author: Lzz
			
 
				 """
			
 
				-from requests.auth import HTTPBasicAuth
			
 
				-import requests
			
 
				 import json
			
 
				 
			
 
				+import requests
			
 
				+from requests.auth import HTTPBasicAuth
			
 
				 
			
 
				-def get_should(title):
			
 
				+import setting
			
 
				 
			
 
				-    # url = "http://192.168.3.149:9201/_analyze"  # 测试
			
 
				-    url = "http://172.17.4.184:19905/_analyze"  # 线上
			
 
				-    username = "jybid"
			
 
				-    password = "Top2023_JEB01i@31"
			
 
				 
			
 
				+def get_should(title):
			
 
				+    url = setting.WORD_SEGMENTATION_API
			
 
				+    auth = HTTPBasicAuth(setting.ES_USERNAME, setting.ES_PASSWORD)
			
 
				     headers = {"Content-Type": "application/json"}
			
 
				-    auth = HTTPBasicAuth(username, password)
			
 
				-    data = {
			
 
				-        "analyzer": "ik_smart",
			
 
				-        "text": title
			
 
				-    }
			
 
				-
			
 
				+    data = {"analyzer": "ik_smart", "text": title}
			
 
				     res = requests.post(url, headers=headers, auth=auth, json=data, timeout=10)
			
 
				 
			
 
				     try:
			
 
				-        res_text = json.loads(res.text).get('tokens') or [{"token":title}]
			
 
				+        res_text = json.loads(res.text).get('tokens') or [{"token": title}]
			
 
				     except:
			
 
				-        res_text = [{"token":title}]
			
 
				+        res_text = [{"token": title}]
			
 
				 
			
 
				     should_list = []
			
 
				     for key in res_text:
			
@@ -46,4 +40,3 @@ def get_should(title):
 
				         should_list.append(single_dict)
			
 
				 
			
 
				     return should_list
			
 
				-