|
@@ -10,7 +10,7 @@ from crawler.clean_html import cleaner
|
|
|
from crawler.crawl_scheduler import Scheduler
|
|
|
from crawler.login import login, load_login_cookies, login_check
|
|
|
from utils.databases import mongo_table, int2long
|
|
|
-from utils.execptions import VoidCrawlError, JyBasicException
|
|
|
+from utils.execptions import CrawlError, YbwCrawlError
|
|
|
from utils.log import logger
|
|
|
from utils.socks5 import Proxy
|
|
|
|
|
@@ -129,7 +129,7 @@ class DetailSpider:
|
|
|
element = fromstring(r.text)
|
|
|
nodes = element.xpath('//*[@id="main_dom"]/div[1]')
|
|
|
if len(nodes) != 1:
|
|
|
- raise VoidCrawlError
|
|
|
+ raise CrawlError
|
|
|
else:
|
|
|
node = nodes[0]
|
|
|
logger.info(f'[采集正文] id={node.attrib.get("id")}')
|
|
@@ -207,7 +207,7 @@ class DetailSpider:
|
|
|
{'$set': {'crawl_status': 'finished'}}
|
|
|
)
|
|
|
sc.crawl_counter(1)
|
|
|
- except JyBasicException as e:
|
|
|
+ except YbwCrawlError as e:
|
|
|
if e.code == 10105:
|
|
|
'''检查出该异常时,程序会将es查询结果更新采集表'''
|
|
|
self.crawl_tab.update_one(
|