|
@@ -201,7 +201,7 @@ class DetailSpider:
|
|
|
{"_id": item["_id"]},
|
|
|
{'$set': {'crawl_status': 'finished'}}
|
|
|
)
|
|
|
- sc.update_count(1)
|
|
|
+ sc.crawl_counter(1)
|
|
|
except JyBasicException as e:
|
|
|
if e.code == 10105:
|
|
|
'''检查出该异常时,程序会将es查询结果更新采集表'''
|
|
@@ -217,16 +217,14 @@ class DetailSpider:
|
|
|
{'$set': {'crawl_status': 'error'}}
|
|
|
)
|
|
|
logger.info('[问题数据]{}-{}'.format(item['title'], item['publishtime']))
|
|
|
- sc.update_count(0)
|
|
|
+ sc.crawl_counter(0)
|
|
|
finally:
|
|
|
self.update_crawl_status(item, False)
|
|
|
sc.wait_for_next_task()
|
|
|
|
|
|
def start(self):
|
|
|
- query = {'used': False, 'site': '元博网', 'class': 'detail'}
|
|
|
while True:
|
|
|
- with Scheduler(query) as scheduler:
|
|
|
- scheduler.crawl_type = 'detail'
|
|
|
+ with Scheduler(site='元博网', crawl_type='detail') as scheduler:
|
|
|
if scheduler.crawl_start:
|
|
|
self.user = scheduler.user
|
|
|
finished = self.crawl_spider(scheduler)
|