dongzhaorui hace 3 años
padre
commit
487b1594a9
Se han modificado 1 ficheros con 8 adiciones y 11 borrados
  1. 8 11
      find_source/crawler/services/sync_data.py

+ 8 - 11
find_source/crawler/services/sync_data.py

@@ -1,7 +1,7 @@
 import threading
 
 from common.log import logger
-from crawler.services.basics import BasicSearch
+from crawler.services.basics import BasicService
 from crawler.utils import is_url, extract_domain
 from settings import (
     MGO_URLS,
@@ -12,7 +12,8 @@ from settings import (
 )
 
 
-class SyncData(BasicSearch):
+class SyncData(BasicService):
+    """数据同步服务"""
 
     def __init__(
             self,
@@ -29,10 +30,6 @@ class SyncData(BasicSearch):
         self._init_validator = init_validator
         self._init_collector = init_collector
         self._allow_load_data = False
-        self._init()
-
-    def _init(self):
-        threading.Thread(target=self.sync_data, name='SyncData').start()
 
     def sync_keywords(self):
         """同步搜索词数据"""
@@ -92,10 +89,10 @@ class SyncData(BasicSearch):
             if not is_url(item['name']):
                 items.remove(item)
                 continue
-            exists_url = self.validator.data(item['name'])
-            if exists_url:
-                items.remove(item)
-                continue
+            # exists_url = self.validator.data(item['name'])
+            # if exists_url:
+            #     items.remove(item)
+            #     continue
             lst.append(self.make_task(
                 url=item['name'],
                 origin=item['name'],
@@ -180,7 +177,7 @@ class SyncData(BasicSearch):
                     count += 1
             logger.info(f'[数据同步]过滤器读取{count}条去重特征')
 
-    def sync_data(self):
+    def start(self):
         """数据同步"""
 
         def _validate():