Bläddra i källkod

添加敏感词和垃圾文本词组过滤

dongzhaorui 3 år sedan
förälder
incheckning
8deeb693e5
1 ändrade filer med 8 tillägg och 6 borttagningar
  1. 8 6
      find_source/crawler/retrieve/verify.py

+ 8 - 6
find_source/crawler/retrieve/verify.py

@@ -22,8 +22,8 @@ def _sensitive_word(title: str):
     """敏感词"""
     for word in SENSITIVE_WORDS:
         if title.find(word) != -1:
-            return False
-    return True
+            return True
+    return False
 
 
 class Validator:
@@ -32,6 +32,8 @@ class Validator:
         self.rbf = RedisFilter(redis_key='duplicated_')
         self.rbf.start(1000000000, 0.00001)
         self.loop_Interval = 7200
+        self._sensitive_word = _sensitive_word
+        self._requirement_phrase = _requirement_phrase
 
     def _load_filter_feature(self):
         while True:
@@ -59,14 +61,14 @@ class Validator:
     def add_filter_feature(self, feature: str):
         self.rbf.add(feature)
 
-    def title(self, title, task):
-        if _sensitive_word(title):
+    def words(self, title, task):
+        if self._sensitive_word(title):
             task['sensitive'] = True
             return False
-        elif not _requirement_phrase(title):
+        elif not self._requirement_phrase(title):
             task['requirement'] = True
             return False
         return True
 
     def url(self, base_url):
-        return self.rbf.is_exists(base_url)
+        return not self.rbf.is_exists(base_url)