|
@@ -5,7 +5,7 @@ from common.execptions import ExploreDataError
|
|
|
from common.log import logger
|
|
|
from crawler.services.basics import BasicService
|
|
|
from crawler.utils import (
|
|
|
- extract_base_url,
|
|
|
+ extract_host,
|
|
|
extract_domain,
|
|
|
is_url,
|
|
|
err_details,
|
|
@@ -51,10 +51,10 @@ class DataQuery(BasicService):
|
|
|
lst = []
|
|
|
urls = self._engine.search(task['search'], cur_page)
|
|
|
for url in urls:
|
|
|
- base_url = extract_base_url(url)
|
|
|
- if not self.validator.data(base_url):
|
|
|
+ host = extract_host(url)
|
|
|
+ if not self.validator.data(host):
|
|
|
lst.append(self.make_task(
|
|
|
- url=base_url,
|
|
|
+ url=host,
|
|
|
origin=task['origin'],
|
|
|
groups=task['groups'],
|
|
|
classify=self.visit_classify,
|