|
@@ -2,55 +2,51 @@
|
|
|
"""
|
|
|
Created on 2023-03-01
|
|
|
---------
|
|
|
-@summary: redis单机过滤
|
|
|
+@summary: redis集群/单机过滤器
|
|
|
---------
|
|
|
@author: dzr
|
|
|
@email: dongzhaorui@topnet.net.cn
|
|
|
"""
|
|
|
-import copy
|
|
|
|
|
|
from feapder.db.redisdb import RedisDB
|
|
|
from feapder.dedup.basefilter import BaseFilter
|
|
|
-from feapder.utils.tools import get_sha256
|
|
|
+import feapder.utils.tools as tools
|
|
|
|
|
|
|
|
|
class RedisFilter(BaseFilter):
|
|
|
redis_db = None
|
|
|
|
|
|
- def __init__(self, redis_url=None, to_sha256: bool = True, expire_time=None):
|
|
|
- self._url = redis_url
|
|
|
+ def __init__(self, redis_url=None, expire_time=None):
|
|
|
if not self.__class__.redis_db:
|
|
|
- self.__class__.redis_db = RedisDB(url=redis_url)
|
|
|
+ if isinstance(redis_url, list) and len(redis_url) > 1:
|
|
|
+ self.__class__.redis_db = RedisDB(
|
|
|
+ ip_ports=redis_url,
|
|
|
+ decode_responses=True,
|
|
|
+ user_pass='',
|
|
|
+ ) # 集群
|
|
|
+ else:
|
|
|
+ self.__class__.redis_db = RedisDB(url=redis_url) # 单机
|
|
|
|
|
|
self._ex = expire_time or 86400 * 365 * 2 # 2年 = 86400 * 365 * 2
|
|
|
self._prefix1 = 'list_'
|
|
|
self._prefix2 = 'pylist_'
|
|
|
|
|
|
- self._to_sha256 = to_sha256
|
|
|
-
|
|
|
def __repr__(self):
|
|
|
- return "<RedisDB: {}>".format(self.redis_db)
|
|
|
-
|
|
|
- def _deal_datas(self, datas):
|
|
|
- if self._to_sha256:
|
|
|
- if isinstance(datas, list):
|
|
|
- keys = [get_sha256(data) for data in datas]
|
|
|
- else:
|
|
|
- keys = get_sha256(datas)
|
|
|
- else:
|
|
|
- keys = copy.deepcopy(datas)
|
|
|
-
|
|
|
- return keys
|
|
|
-
|
|
|
- def _exists(self, key):
|
|
|
- return self.redis_db.exists(key)
|
|
|
+ return "<RedisFilter: {}>".format(self.redis_db)
|
|
|
|
|
|
def exists(self, key):
|
|
|
"""全量检索/lua增量检索/python增量检索"""
|
|
|
+ if '&&' in key:
|
|
|
+ md5, sha256 = key.split("&&")
|
|
|
+ mixture = tools.get_sha256(md5)
|
|
|
+ else:
|
|
|
+ mixture = sha256 = key
|
|
|
+
|
|
|
if (
|
|
|
- self._exists(key) > 0
|
|
|
- or self._exists(self._prefix1 + key) > 0
|
|
|
- or self._exists(self._prefix2 + key) > 0
|
|
|
+ self.redis_db.exists(sha256) > 0
|
|
|
+ or self.redis_db.exists(self._prefix1 + sha256) > 0
|
|
|
+ or self.redis_db.exists(self._prefix2 + sha256) > 0
|
|
|
+ or self.redis_db.exists(self._prefix2 + mixture) > 0
|
|
|
):
|
|
|
return True
|
|
|
return False
|
|
@@ -63,13 +59,17 @@ class RedisFilter(BaseFilter):
|
|
|
"""
|
|
|
is_list = isinstance(keys, list)
|
|
|
keys = keys if is_list else [keys]
|
|
|
- encrypt_keys = self._deal_datas(keys)
|
|
|
|
|
|
is_added = []
|
|
|
- for key in encrypt_keys:
|
|
|
+ for key in keys:
|
|
|
if not self.exists(key):
|
|
|
+ if '&&' in key:
|
|
|
+ md5, sha256 = key.split("&&")
|
|
|
+ else:
|
|
|
+ sha256 = key
|
|
|
+
|
|
|
is_added.append(
|
|
|
- self.redis_db.set(self._prefix2 + key, 1, ex=self._ex)
|
|
|
+ self.redis_db.set(self._prefix2 + sha256, 1, ex=self._ex)
|
|
|
)
|
|
|
else:
|
|
|
is_added.append(False)
|
|
@@ -84,15 +84,14 @@ class RedisFilter(BaseFilter):
|
|
|
"""
|
|
|
is_list = isinstance(keys, list)
|
|
|
keys = keys if is_list else [keys]
|
|
|
- encrypt_keys = self._deal_datas(keys)
|
|
|
|
|
|
is_exist = []
|
|
|
- for key in encrypt_keys:
|
|
|
+ for key in keys:
|
|
|
is_exist.append(self.exists(key))
|
|
|
|
|
|
# 判断数据本身是否重复
|
|
|
temp_set = set()
|
|
|
- for i, key in enumerate(encrypt_keys):
|
|
|
+ for i, key in enumerate(keys):
|
|
|
if key in temp_set:
|
|
|
is_exist[i] = True
|
|
|
else:
|