dongzhaorui 3 жил өмнө
parent
commit
a64b6ea401

+ 14 - 13
find_source/crawler/retrieve/verify.py

@@ -10,18 +10,18 @@ from settings import (
 )
 
 
-def _requirement_phrase(title: str):
+def _requirement_phrase(val: str):
     """关键词"""
     for word in REQUIREMENT_PHRASE:
-        if title.find(word) != -1:
+        if val.find(word) != -1:
             return True
     return False
 
 
-def _sensitive_word(title: str):
-    """敏感词"""
+def _sensitive_word(val: str):
+    """垃圾词"""
     for word in SENSITIVE_WORDS:
-        if title.find(word) != -1:
+        if val.find(word) != -1:
             return True
     return False
 
@@ -62,14 +62,15 @@ class Validator:
     def add_url(self, url: str):
         self._rbf.add(url)
 
-    def words(self, title, task):
-        if self._sensitive_word(title):
-            task['sensitive'] = True
-            return False
-        elif not self._requirement_phrase(title):
-            task['requirement'] = True
-            return False
-        return True
+    def sensitive_word(self, val):
+        if val is None or len(val) < 5:
+            return True
+        if self._sensitive_word(val):
+            return True
+        return False
+
+    def requirement_word(self, val):
+        return self._requirement_phrase(val)
 
     def url(self, url: str):
         return self._rbf.is_exists(url)