Эх сурвалжийг харах

更换MongoDB垃圾表名称

dongzhaorui 3 жил өмнө
parent
commit
e9af1fd92f

+ 2 - 2
find_source/crawler/services/basics.py

@@ -23,7 +23,7 @@ from settings import (
     MGO_ORGS,
     MGO_KEYWORDS,
     MGO_COMPETING_GOODS,
-    MGO_REMOVAL_DUPLICATE,
+    MGO_DATA_GARBAGE,
     MGO_DOMAIN,
     MGO_QUERY,
     MGO_RECORDS
@@ -133,7 +133,7 @@ class BasicService:
     def push_remove(self, task: Task):
         """数据去重表"""
         if not self.validator.data(task['url']):
-            self._push_data('remove', task, MGO_REMOVAL_DUPLICATE)
+            self._push_data('remove', task, MGO_DATA_GARBAGE)
             self.validator.add_data(task['url'])
             return True
         return False

+ 3 - 3
find_source/crawler/services/sync_data.py

@@ -7,7 +7,7 @@ from settings import (
     MGO_URLS,
     MGO_ORGS,
     MGO_COMPETING_GOODS,
-    MGO_REMOVAL_DUPLICATE,
+    MGO_DATA_GARBAGE,
     MGO_LUA_SPIDERS
 )
 
@@ -161,12 +161,12 @@ class SyncData(BasicService):
         if self._init_validator:
             logger.info(f'[数据同步]开始加载 - 过滤器')
             count = 0
-            cursor = MGO_REMOVAL_DUPLICATE.find(projection={'domain': 1})
+            cursor = MGO_DATA_GARBAGE.find(projection={'domain': 1})
             for item in cursor.sort(self.sort):
                 try:
                     domain = item['domain']
                     if not isinstance(domain, str):
-                        MGO_REMOVAL_DUPLICATE.delete_one({'_id': item['_id']})
+                        MGO_DATA_GARBAGE.delete_one({'_id': item['_id']})
                         continue
                 except IndexError:
                     continue

+ 4 - 6
find_source/settings.py

@@ -3,10 +3,10 @@ from common.databases import mongo_table, redis_client
 
 '''Mongo'''
 MGO_DATABASE = 'shujuziyuan'
-'''去重表'''
-MGO_REMOVAL_DUPLICATE = mongo_table(db=MGO_DATABASE, name='removal_duplicate')
-'''数据挖掘结果表'''
-MGO_DOMAIN = mongo_table(db=MGO_DATABASE, name='new_domains')
+'''垃圾表'''
+MGO_DATA_GARBAGE = mongo_table(db=MGO_DATABASE, name='data_garbage')
+'''寻源结果表'''
+MGO_DOMAIN = mongo_table(db=MGO_DATABASE, name='data_domains')
 '''查询结果表'''
 MGO_QUERY = mongo_table(db=MGO_DATABASE, name='data_query')
 ''''数据采集记录表'''
@@ -19,8 +19,6 @@ MGO_KEYWORDS = mongo_table(db=MGO_DATABASE, name='retrieve_keywords')
 MGO_URLS = mongo_table(db=MGO_DATABASE, name='retrieve_urls')
 '''竞品列表'''
 MGO_COMPETING_GOODS = mongo_table(db=MGO_DATABASE, name='retrieve_competing_goods')
-'''网站栏目表'''
-MGO_WEBSITE = mongo_table(db=MGO_DATABASE, name='website')
 '''luaconfig'''
 MGO_LUA_SPIDERS = mongo_table(db='editor', name='luaconfig')