validate.py 788 B

12345678910111213141516171819202122232425262728293031
  1. from crawler.bloom_filter.RedisBloomFilter import RedisFilter
  2. from settings import FILTER_WORDS
  3. class Validator:
  4. def __init__(self, redis_key='Validator_'):
  5. self._validator_name = redis_key
  6. self._rbf = RedisFilter(redis_key=self._validator_name)
  7. self._rbf.start(1000000000, 0.00001)
  8. @staticmethod
  9. def _filter_words(val: str):
  10. """过滤词"""
  11. for word in FILTER_WORDS:
  12. if val.find(word) != -1:
  13. return True
  14. return False
  15. def add_data(self, val: str):
  16. return self._rbf.add(val)
  17. def data(self, val: str):
  18. return self._rbf.is_exists(val)
  19. def words(self, val: str):
  20. return self._filter_words(val)
  21. @property
  22. def length(self):
  23. return len(self._rbf)