dongzhaorui 3 年之前
父节点
当前提交
6db899cb7a
共有 1 个文件被更改,包括 4 次插入4 次删除
  1. 4 4
      find_source/crawler/services/query.py

+ 4 - 4
find_source/crawler/services/query.py

@@ -5,7 +5,7 @@ from common.execptions import ExploreDataError
 from common.log import logger
 from crawler.services.basics import BasicService
 from crawler.utils import (
-    extract_base_url,
+    extract_host,
     extract_domain,
     is_url,
     err_details,
@@ -51,10 +51,10 @@ class DataQuery(BasicService):
                 lst = []
                 urls = self._engine.search(task['search'], cur_page)
                 for url in urls:
-                    base_url = extract_base_url(url)
-                    if not self.validator.data(base_url):
+                    host = extract_host(url)
+                    if not self.validator.data(host):
                         lst.append(self.make_task(
-                            url=base_url,
+                            url=host,
                             origin=task['origin'],
                             groups=task['groups'],
                             classify=self.visit_classify,