|
@@ -0,0 +1,101 @@
|
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
|
+"""
|
|
|
|
+Created on 2023-03-01
|
|
|
|
+---------
|
|
|
|
+@summary: redis单机过滤
|
|
|
|
+---------
|
|
|
|
+@author: dzr
|
|
|
|
+@email: dongzhaorui@topnet.net.cn
|
|
|
|
+"""
|
|
|
|
+import copy
|
|
|
|
+
|
|
|
|
+from feapder.db.redisdb import RedisDB
|
|
|
|
+from feapder.dedup.basefilter import BaseFilter
|
|
|
|
+from feapder.utils.tools import get_sha256
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class RedisFilter(BaseFilter):
|
|
|
|
+ redis_db = None
|
|
|
|
+
|
|
|
|
+ def __init__(self, redis_url=None, to_sha256: bool = True, expire_time=None):
|
|
|
|
+ self._url = redis_url
|
|
|
|
+ if not self.__class__.redis_db:
|
|
|
|
+ self.__class__.redis_db = RedisDB(url=redis_url)
|
|
|
|
+
|
|
|
|
+ self._ex = expire_time or 86400 * 365 * 2 # 2年 = 86400 * 365 * 2
|
|
|
|
+ self._prefix1 = 'list_'
|
|
|
|
+ self._prefix2 = 'pylist_'
|
|
|
|
+
|
|
|
|
+ self._to_sha256 = to_sha256
|
|
|
|
+
|
|
|
|
+ def __repr__(self):
|
|
|
|
+ return "<RedisDB: {}>".format(self.redis_db)
|
|
|
|
+
|
|
|
|
+ def _deal_datas(self, datas):
|
|
|
|
+ if self._to_sha256:
|
|
|
|
+ if isinstance(datas, list):
|
|
|
|
+ keys = [get_sha256(data) for data in datas]
|
|
|
|
+ else:
|
|
|
|
+ keys = get_sha256(datas)
|
|
|
|
+ else:
|
|
|
|
+ keys = copy.deepcopy(datas)
|
|
|
|
+
|
|
|
|
+ return keys
|
|
|
|
+
|
|
|
|
+ def _exists(self, key):
|
|
|
|
+ return self.redis_db.exists(key)
|
|
|
|
+
|
|
|
|
+ def exists(self, key):
|
|
|
|
+ """全量检索/lua增量检索/python增量检索"""
|
|
|
|
+ if (
|
|
|
|
+ self._exists(key) > 0
|
|
|
|
+ or self._exists(self._prefix1 + key) > 0
|
|
|
|
+ or self._exists(self._prefix2 + key) > 0
|
|
|
|
+ ):
|
|
|
|
+ return True
|
|
|
|
+ return False
|
|
|
|
+
|
|
|
|
+ def add(self, keys, *args, **kwargs):
|
|
|
|
+ """
|
|
|
|
+ 添加数据
|
|
|
|
+ @param keys: 检查关键词在 redis 中是否存在,支持列表批量
|
|
|
|
+ @return: list / 单个值(如果数据已存在 返回 False 否则返回 True, 可以理解为是否添加成功)
|
|
|
|
+ """
|
|
|
|
+ is_list = isinstance(keys, list)
|
|
|
|
+ keys = keys if is_list else [keys]
|
|
|
|
+ encrypt_keys = self._deal_datas(keys)
|
|
|
|
+
|
|
|
|
+ is_added = []
|
|
|
|
+ for key in encrypt_keys:
|
|
|
|
+ if not self.exists(key):
|
|
|
|
+ is_added.append(
|
|
|
|
+ self.redis_db.set(self._prefix2 + key, 1, ex=self._ex)
|
|
|
|
+ )
|
|
|
|
+ else:
|
|
|
|
+ is_added.append(False)
|
|
|
|
+
|
|
|
|
+ return is_added if is_list else is_added[0]
|
|
|
|
+
|
|
|
|
+ def get(self, keys):
|
|
|
|
+ """
|
|
|
|
+ 检查数据是否存在
|
|
|
|
+ @param keys: list / 单个值
|
|
|
|
+ @return: list / 单个值 (存在返回True 不存在返回False)
|
|
|
|
+ """
|
|
|
|
+ is_list = isinstance(keys, list)
|
|
|
|
+ keys = keys if is_list else [keys]
|
|
|
|
+ encrypt_keys = self._deal_datas(keys)
|
|
|
|
+
|
|
|
|
+ is_exist = []
|
|
|
|
+ for key in encrypt_keys:
|
|
|
|
+ is_exist.append(self.exists(key))
|
|
|
|
+
|
|
|
|
+ # 判断数据本身是否重复
|
|
|
|
+ temp_set = set()
|
|
|
|
+ for i, key in enumerate(encrypt_keys):
|
|
|
|
+ if key in temp_set:
|
|
|
|
+ is_exist[i] = True
|
|
|
|
+ else:
|
|
|
|
+ temp_set.add(key)
|
|
|
|
+
|
|
|
|
+ return is_exist if is_list else is_exist[0]
|