|
@@ -3,7 +3,8 @@ import time
|
|
|
from selenium.common.exceptions import (
|
|
|
WebDriverException,
|
|
|
TimeoutException,
|
|
|
- InvalidSessionIdException
|
|
|
+ InvalidSessionIdException,
|
|
|
+ NoSuchElementException
|
|
|
)
|
|
|
from selenium.webdriver import ActionChains
|
|
|
from selenium.webdriver.common.by import By
|
|
@@ -179,13 +180,6 @@ def goto(driver, web_element, wait_time=None, allow_check_page=False):
|
|
|
def next_page(driver, category):
|
|
|
"""翻页"""
|
|
|
_finished_pages = CRAWL_RECORDS[category]['pages']
|
|
|
- # web_elements = driver.find_elements(by=By.XPATH, value='//div[@id="Pagination"]/div[1]/child::*')
|
|
|
- # for element in web_elements[1:-1]:
|
|
|
- # val = element.text
|
|
|
- # if val not in _finished_pages:
|
|
|
- # goto(driver, element, wait_time=1.2)
|
|
|
- # return int(val)
|
|
|
- # else:
|
|
|
while True:
|
|
|
next_element = driver.find_element_by_xpath('//div[@id="Pagination"]/div[1]/child::a[last()]')
|
|
|
if next_element.text == '下一页':
|
|
@@ -332,7 +326,12 @@ def crawl_psp_frame(driver, handler, item):
|
|
|
check_timeout=15
|
|
|
)
|
|
|
'''切换到frame'''
|
|
|
- driver.switch_to.frame('mini-iframe-6')
|
|
|
+ try:
|
|
|
+ driver.switch_to.frame('mini-iframe-6')
|
|
|
+ except NoSuchElementException:
|
|
|
+ driver.quit()
|
|
|
+ logger.error(f'[未检测到iframe-{item["channel"]}]{item["title"]} - {item["competehref"]}')
|
|
|
+ raise NoSuchElementException()
|
|
|
'''等待加载数据'''
|
|
|
wait_load_detail(driver, check_feature='//div[contains(@role, "accordion")]')
|
|
|
content_html = extract_page_html(driver.page_source, feature='//div[@class="fui-accordions"]')
|