dongzhaorui 3 年 前
コミット
fa6240cb87
1 ファイル変更5 行追加7 行削除
  1. 5 7
      find_source/crawler/services/sync_data.py

+ 5 - 7
find_source/crawler/services/sync_data.py

@@ -143,16 +143,14 @@ class SyncData(BasicService):
         if self._init_collector:
         if self._init_collector:
             logger.info(f'[数据同步]开始加载 - 收录器')
             logger.info(f'[数据同步]开始加载 - 收录器')
             count = 0
             count = 0
+            q = {"param_common.11": {'$exists': True}}
             projection = {'param_common': 1}
             projection = {'param_common': 1}
-            cursor = MGO_LUA_SPIDERS.find(projection=projection)
+            cursor = MGO_LUA_SPIDERS.find(q, projection=projection)
             for item in cursor.sort(self.sort):
             for item in cursor.sort(self.sort):
-                try:
-                    url = item['param_common'][11]
-                    if not is_url(url):
-                        continue
-                    domain = extract_domain(url)
-                except IndexError:
+                url = item['param_common'][11]
+                if not is_url(url):
                     continue
                     continue
+                domain = extract_domain(url)
                 if not self.collector.data(domain):
                 if not self.collector.data(domain):
                     self.collector.add_data(domain)
                     self.collector.add_data(domain)
                     count += 1
                     count += 1