dongzhaorui 3 anni fa
parent
commit
fa6240cb87
1 ha cambiato i file con 5 aggiunte e 7 eliminazioni
  1. 5 7
      find_source/crawler/services/sync_data.py

+ 5 - 7
find_source/crawler/services/sync_data.py

@@ -143,16 +143,14 @@ class SyncData(BasicService):
         if self._init_collector:
             logger.info(f'[数据同步]开始加载 - 收录器')
             count = 0
+            q = {"param_common.11": {'$exists': True}}
             projection = {'param_common': 1}
-            cursor = MGO_LUA_SPIDERS.find(projection=projection)
+            cursor = MGO_LUA_SPIDERS.find(q, projection=projection)
             for item in cursor.sort(self.sort):
-                try:
-                    url = item['param_common'][11]
-                    if not is_url(url):
-                        continue
-                    domain = extract_domain(url)
-                except IndexError:
+                url = item['param_common'][11]
+                if not is_url(url):
                     continue
+                domain = extract_domain(url)
                 if not self.collector.data(domain):
                     self.collector.add_data(domain)
                     count += 1