dongzhaorui 3 anos atrás
pai
commit
48f1aa3afe
1 arquivos alterados com 5 adições e 3 exclusões
  1. 5 3
      find_source/crawler/spiders.py

+ 5 - 3
find_source/crawler/spiders.py

@@ -354,12 +354,12 @@ class SearchEngine(BasicSearch):
                 logger.info(f"<QccSearch> {task['groups']} >>> {word}")
                 try:
                     url = engine.by_org_get_site(task['search'])
-                    task['url'] = extract_base_url(url)
+                    task['url'] = url
                     task['name'] = word
-                    task['domain'] = extract_domain(task['url'])
+                    task['domain'] = extract_domain(url)
                     '''保存数据'''
                     self.push_data('save', task, MGO_SEARCH)
-                    if not is_url(task['url']):
+                    if not is_url(url):
                         continue
                     if self.validator.url(task['domain']):
                         continue
@@ -370,6 +370,8 @@ class SearchEngine(BasicSearch):
                     self.scheduler.add_excavate(task, level=task['weight'])
                 except HostsRetrieveError as e:
                     logger.exception(e)
+                    '''重新放回任务队列'''
+                    self.scheduler.add_query(task, level=task['weight'])
             else:
                 '''使用搜索引擎查询关键词'''
                 logger.info(f"<{ename}> {task['groups']} >>> {word}")