dongzhaorui 3 жил өмнө
parent
commit
1105ac49ad

+ 4 - 0
find_source/crawler/services/basics.py

@@ -140,6 +140,10 @@ class BasicService:
 
     def push_domain(self, task: Task):
         """数据挖掘结果,推送保存"""
+        if task['groups'] == self.url_groups:
+            duplicate = str(task['origin']).count(task['domain']) > 0
+            if duplicate:
+                return False
         if not self.collector.data(task['domain']):
             self._push_data('domain', task, MGO_DOMAIN)
             self.collector.add_data(task['domain'])