|
@@ -2,21 +2,22 @@
|
|
"""
|
|
"""
|
|
Created on 2023-03-01
|
|
Created on 2023-03-01
|
|
---------
|
|
---------
|
|
-@summary:
|
|
|
|
|
|
+@summary: redis集群过滤
|
|
---------
|
|
---------
|
|
@author: dzr
|
|
@author: dzr
|
|
@email: dongzhaorui@topnet.net.cn
|
|
@email: dongzhaorui@topnet.net.cn
|
|
"""
|
|
"""
|
|
-from Crypto.Hash import SHA256
|
|
|
|
|
|
+import copy
|
|
|
|
|
|
from feapder.db.redisdb import RedisDB
|
|
from feapder.db.redisdb import RedisDB
|
|
from feapder.dedup.basefilter import BaseFilter
|
|
from feapder.dedup.basefilter import BaseFilter
|
|
|
|
+from feapder.utils.tools import get_sha256
|
|
|
|
|
|
|
|
|
|
-class SwordFishFilter(BaseFilter):
|
|
|
|
|
|
+class RedisClusterFilter(BaseFilter):
|
|
redis_cluster = None
|
|
redis_cluster = None
|
|
|
|
|
|
- def __init__(self, redis_url, expire_time=None):
|
|
|
|
|
|
+ def __init__(self, redis_url, to_sha256: bool = True, expire_time=None):
|
|
if not redis_url:
|
|
if not redis_url:
|
|
raise ValueError("redis_url can't be None")
|
|
raise ValueError("redis_url can't be None")
|
|
|
|
|
|
@@ -29,28 +30,35 @@ class SwordFishFilter(BaseFilter):
|
|
)
|
|
)
|
|
|
|
|
|
self._ex = expire_time or 86400 * 365 * 2 # 2年 = 86400 * 365 * 2
|
|
self._ex = expire_time or 86400 * 365 * 2 # 2年 = 86400 * 365 * 2
|
|
- self._prefix = 'pylist_'
|
|
|
|
|
|
+ self._prefix1 = 'list_'
|
|
|
|
+ self._prefix2 = 'pylist_'
|
|
|
|
+
|
|
|
|
+ self._to_sha256 = to_sha256
|
|
|
|
|
|
def __repr__(self):
|
|
def __repr__(self):
|
|
return "<RedisCluster: {}>".format(self.startup_nodes)
|
|
return "<RedisCluster: {}>".format(self.startup_nodes)
|
|
|
|
|
|
- @staticmethod
|
|
|
|
- def sha256_encrypt(info):
|
|
|
|
- if info is None:
|
|
|
|
- return ''
|
|
|
|
- res = SHA256.new(info.encode('utf-8'))
|
|
|
|
- data = res.hexdigest()
|
|
|
|
- return data
|
|
|
|
|
|
+ def _deal_datas(self, datas):
|
|
|
|
+ if self._to_sha256:
|
|
|
|
+ if isinstance(datas, list):
|
|
|
|
+ keys = [get_sha256(data) for data in datas]
|
|
|
|
+ else:
|
|
|
|
+ keys = get_sha256(datas)
|
|
|
|
+ else:
|
|
|
|
+ keys = copy.deepcopy(datas)
|
|
|
|
|
|
- def encrypt_datas(self, datas):
|
|
|
|
- return [self.sha256_encrypt(data) for data in datas]
|
|
|
|
|
|
+ return keys
|
|
|
|
|
|
def _exists(self, key):
|
|
def _exists(self, key):
|
|
return self.redis_cluster.exists(key)
|
|
return self.redis_cluster.exists(key)
|
|
|
|
|
|
def exists(self, key):
|
|
def exists(self, key):
|
|
- """全量检索或者列表页检索"""
|
|
|
|
- if self._exists(key) > 0 or self._exists(self._prefix + key) > 0:
|
|
|
|
|
|
+ """全量检索/lua增量检索/python增量检索"""
|
|
|
|
+ if (
|
|
|
|
+ self._exists(key) > 0
|
|
|
|
+ or self._exists(self._prefix1 + key) > 0
|
|
|
|
+ or self._exists(self._prefix2 + key) > 0
|
|
|
|
+ ):
|
|
return True
|
|
return True
|
|
return False
|
|
return False
|
|
|
|
|
|
@@ -62,12 +70,14 @@ class SwordFishFilter(BaseFilter):
|
|
"""
|
|
"""
|
|
is_list = isinstance(keys, list)
|
|
is_list = isinstance(keys, list)
|
|
keys = keys if is_list else [keys]
|
|
keys = keys if is_list else [keys]
|
|
- encrypt_keys = self.encrypt_datas(keys)
|
|
|
|
|
|
+ encrypt_keys = self._deal_datas(keys)
|
|
|
|
|
|
is_added = []
|
|
is_added = []
|
|
for key in encrypt_keys:
|
|
for key in encrypt_keys:
|
|
if not self.exists(key):
|
|
if not self.exists(key):
|
|
- is_added.append(self.redis_cluster.set(self._prefix + key, 1, ex=self._ex))
|
|
|
|
|
|
+ is_added.append(
|
|
|
|
+ self.redis_cluster.set(self._prefix2 + key, 1, ex=self._ex)
|
|
|
|
+ )
|
|
else:
|
|
else:
|
|
is_added.append(False)
|
|
is_added.append(False)
|
|
|
|
|
|
@@ -81,7 +91,7 @@ class SwordFishFilter(BaseFilter):
|
|
"""
|
|
"""
|
|
is_list = isinstance(keys, list)
|
|
is_list = isinstance(keys, list)
|
|
keys = keys if is_list else [keys]
|
|
keys = keys if is_list else [keys]
|
|
- encrypt_keys = self.encrypt_datas(keys)
|
|
|
|
|
|
+ encrypt_keys = self._deal_datas(keys)
|
|
|
|
|
|
is_exist = []
|
|
is_exist = []
|
|
for key in encrypt_keys:
|
|
for key in encrypt_keys:
|