|
@@ -2,7 +2,8 @@ import time
|
|
|
|
|
|
from selenium.common.exceptions import (
|
|
|
WebDriverException,
|
|
|
- TimeoutException
|
|
|
+ TimeoutException,
|
|
|
+ NoSuchElementException
|
|
|
)
|
|
|
|
|
|
from common.databases import mongo_table, int2long, redis_client
|
|
@@ -118,7 +119,6 @@ def crawl_spider(crawl_max_page=1, enable_proxy=False, **kw):
|
|
|
"spidercode": crawl_menu.spidercode,
|
|
|
"T": "bidding",
|
|
|
"sendflag": "false",
|
|
|
- "iscompete": "true",
|
|
|
"_d": "comeintime",
|
|
|
"comeintime": '',
|
|
|
"area": '',
|
|
@@ -138,8 +138,6 @@ def crawl_spider(crawl_max_page=1, enable_proxy=False, **kw):
|
|
|
print(f'>>> {sign}')
|
|
|
if r.hexists(redis_key, sign):
|
|
|
continue
|
|
|
-
|
|
|
- item['href'] = detail_js
|
|
|
'''发布标题'''
|
|
|
node1 = element.find_element_by_xpath('./td[1]/a')
|
|
|
title = node1.text
|
|
@@ -163,11 +161,20 @@ def crawl_spider(crawl_max_page=1, enable_proxy=False, **kw):
|
|
|
'''访问详情页'''
|
|
|
goto(browser, node1, wait_time=2)
|
|
|
'''详情页'''
|
|
|
+ detail_url = 'http://www.cebpubservice.com/ctpsp_iiss/searchbusinesstypebeforedooraction/showDetails.do'
|
|
|
if detail_js.startswith('showDetails') is False:
|
|
|
- item = crawl_psp_frame(browser, main_handler, item)
|
|
|
+ try:
|
|
|
+ item = crawl_psp_frame(browser, main_handler, item)
|
|
|
+ item['href'] = '#'
|
|
|
+ item['competehref'] = detail_url
|
|
|
+ except NoSuchElementException:
|
|
|
+ logger.error('[加载超时]frame框架加载失败')
|
|
|
+ continue
|
|
|
else:
|
|
|
try:
|
|
|
item = crawl_show_details(browser, main_handler, item)
|
|
|
+ item['href'] = '#'
|
|
|
+ item['competehref'] = '{}/{}'.format(detail_url, sign)
|
|
|
except (ValueError, WebDriverException) as e:
|
|
|
exit_crawl = True
|
|
|
if e.__class__.__name__ == 'ValueError':
|
|
@@ -184,6 +191,8 @@ def crawl_spider(crawl_max_page=1, enable_proxy=False, **kw):
|
|
|
if '_id' in item:
|
|
|
del item['_id']
|
|
|
logger.info(f'[采集成功-{item["channel"]}]{title} - {publish_time}')
|
|
|
+ '''备注:详情页访问参数'''
|
|
|
+ item['remark'] = detail_js
|
|
|
'''添加数据指纹'''
|
|
|
r.hset(redis_key, sign, '')
|
|
|
'''保存列表'''
|