Browse Source

优化多机redis过滤器配置格式

dongzhaorui 1 year ago
parent
commit
6081c65547

+ 15 - 13
FworkSpider/feapder/dedup/README.md

@@ -109,24 +109,26 @@ def test_filter():
 ```
 
 ```python
-# redis 多redis实例去重
+# 多个 redis 实例去重
 from feapder.dedup import Dedup
 
 def test_filter():
-    redis_conf = dict(
-        pylist_=dict(
-            redisdb_ip_port="192.168.3.71:8371",
-            redisdb_user_pass="top@123",
-            redisdb_db=0
+    redisdb_conf = [
+        dict(
+            fingerprint_pref="pylist_",
+            ip_port="192.168.3.71:8371",
+            user_pass="top@123",
+            db=0
         ),
-        list_=dict(
-            redisdb_ip_port="192.168.3.165:8165",
-            redisdb_user_pass="",
-            redisdb_db=0
+        dict(
+            fingerprint_pref="list_",
+            ip_port="192.168.3.165:8165",
+            user_pass="top@123",
+            db=0
         )
-    )
+    ]
     
-    dedup = Dedup(filter_type=6, to_md5=False, redis_conf=redis_conf, expire_time=60)
+    dedup = Dedup(filter_type=6, to_md5=False, redisdb_conf=redisdb_conf, expire_time=60)
     datas = ["xxx", "bbb"]
     dedup.add(datas)
     
@@ -135,4 +137,4 @@ def test_filter():
     dedup.filter_exist_data(datas)
     print(datas)
     assert datas == ["ccc"]
-```
+```

+ 5 - 5
FworkSpider/feapder/dedup/__init__.py

@@ -15,7 +15,7 @@ from feapder.utils.tools import get_md5
 from .bloomfilter import BloomFilter, ScalableBloomFilter
 from .expirefilter import ExpireFilter
 from .litefilter import LiteFilter
-from .redisfilter import RedisFilter, MRedisFilter
+from .redisfilter import RedisFilter, MultiRedisFilter
 
 
 class Dedup:
@@ -24,7 +24,7 @@ class Dedup:
     ExpireFilter = 3
     LiteFilter = 4
     RedisFilter = 5
-    MRedisFilter = 6
+    MultiRedisFilter = 6
 
     def __init__(self, filter_type: int = BloomFilter, to_md5: bool = True, **kwargs):
         if filter_type == Dedup.ExpireFilter:
@@ -51,9 +51,9 @@ class Dedup:
                 redis_url=kwargs.get("redis_url"),
                 expire_time=kwargs.get("expire_time")
             )
-        elif filter_type == Dedup.MRedisFilter:
-            self.dedup = MRedisFilter(
-                redis_conf=kwargs.get("redis_conf"),
+        elif filter_type == Dedup.MultiRedisFilter:
+            self.dedup = MultiRedisFilter(
+                redisdb_conf=kwargs.get("redisdb_conf"),
                 expire_time=kwargs.get("expire_time")
             )
         else:

+ 20 - 22
FworkSpider/feapder/dedup/redisfilter.py

@@ -10,7 +10,6 @@ Created on 2023-03-01
 
 from feapder.db.redisdb import RedisDB
 from feapder.dedup.basefilter import BaseFilter
-import feapder.utils.tools as tools
 
 
 class RedisFilter(BaseFilter):
@@ -79,35 +78,35 @@ class RedisFilter(BaseFilter):
         return is_exist if is_list else is_exist[0]
 
 
-class MRedisFilter(RedisFilter):
+class MultiRedisFilter(RedisFilter):
     redis_dbs = {}
 
-    def __init__(self, redis_conf=None, **kwargs):
-        super(MRedisFilter, self).__init__(**kwargs)
+    def __init__(self, redisdb_conf=None, **kwargs):
+        super(MultiRedisFilter, self).__init__(**kwargs)
 
-        self._prefix1 = 'list_'  # lua前缀
-        self._prefix2 = 'pylist_'  # python前缀
+        self._go_fingerprint_pref = "list_"  # lua 数据指纹前缀标识
+        self._py_fingerprint_pref = "pylist_"  # python 数据指纹前缀标识
 
-        if not redis_conf:
-            self.__class__.redis_dbs[self._prefix2] = RedisDB()
+        if not redisdb_conf:
+            self.__class__.redis_dbs[self._py_fingerprint_pref] = RedisDB()
         else:
-            if not isinstance(redis_conf, dict):
-                raise ValueError("redis_conf 必须是一个 dict")
-
-            for prefix, conf in redis_conf.items():
-                self.__class__.redis_dbs[prefix] = RedisDB(
-                    ip_ports=conf['redisdb_ip_port'],
-                    user_pass=conf['redisdb_user_pass'],
-                    db=conf['redisdb_db']
+            if not isinstance(redisdb_conf, list):
+                raise ValueError("redisdb_conf 必须是一个 list")
+
+            for conf in redisdb_conf:
+                self.__class__.redis_dbs[conf["fingerprint_pref"]] = RedisDB(
+                    ip_ports=conf["ip_port"],
+                    user_pass=conf["user_pass"],
+                    db=conf["db"]
                 )
 
     def __repr__(self):
-        return "<MRedisFilter: {}>".format(self.redis_dbs)
+        return "<MultiRedisFilter: {}>".format(self.redis_dbs)
 
     def exists(self, key):
         """lua增量检索/python增量检索"""
-        for prefix, redis_db in self.redis_dbs.items():
-            if redis_db.exists(prefix + key) > 0:
+        for fingerprint_pref, redis_db in self.redis_dbs.items():
+            if redis_db.exists(fingerprint_pref + key) > 0:
                 return True
         return False
 
@@ -119,13 +118,12 @@ class MRedisFilter(RedisFilter):
         """
         is_list = isinstance(keys, list)
         keys = keys if is_list else [keys]
-
-        redis_db = self.redis_dbs[self._prefix2]
+        redis_db = self.redis_dbs[self._py_fingerprint_pref]
 
         is_added = []
         for key in keys:
             if not self.exists(key):
-                key = self._prefix2 + key
+                key = self._py_fingerprint_pref + key
                 is_added.append(redis_db.set(key, 1, ex=self._ex))
             else:
                 is_added.append(False)