Browse Source

元博网 - 更新异常类

dongzhaorui 3 years ago
parent
commit
458a288e9b
1 changed files with 3 additions and 3 deletions
  1. 3 3
      ybw/detail_spider.py

+ 3 - 3
ybw/detail_spider.py

@@ -10,7 +10,7 @@ from crawler.clean_html import cleaner
 from crawler.crawl_scheduler import Scheduler
 from crawler.crawl_scheduler import Scheduler
 from crawler.login import login, load_login_cookies, login_check
 from crawler.login import login, load_login_cookies, login_check
 from utils.databases import mongo_table, int2long
 from utils.databases import mongo_table, int2long
-from utils.execptions import VoidCrawlError, JyBasicException
+from utils.execptions import CrawlError, YbwCrawlError
 from utils.log import logger
 from utils.log import logger
 from utils.socks5 import Proxy
 from utils.socks5 import Proxy
 
 
@@ -129,7 +129,7 @@ class DetailSpider:
                 element = fromstring(r.text)
                 element = fromstring(r.text)
                 nodes = element.xpath('//*[@id="main_dom"]/div[1]')
                 nodes = element.xpath('//*[@id="main_dom"]/div[1]')
                 if len(nodes) != 1:
                 if len(nodes) != 1:
-                    raise VoidCrawlError
+                    raise CrawlError
                 else:
                 else:
                     node = nodes[0]
                     node = nodes[0]
                     logger.info(f'[采集正文] id={node.attrib.get("id")}')
                     logger.info(f'[采集正文] id={node.attrib.get("id")}')
@@ -207,7 +207,7 @@ class DetailSpider:
                         {'$set': {'crawl_status': 'finished'}}
                         {'$set': {'crawl_status': 'finished'}}
                     )
                     )
                     sc.crawl_counter(1)
                     sc.crawl_counter(1)
-            except JyBasicException as e:
+            except YbwCrawlError as e:
                 if e.code == 10105:
                 if e.code == 10105:
                     '''检查出该异常时,程序会将es查询结果更新采集表'''
                     '''检查出该异常时,程序会将es查询结果更新采集表'''
                     self.crawl_tab.update_one(
                     self.crawl_tab.update_one(