|
@@ -113,7 +113,7 @@ class ListSpider:
|
|
|
"l_np_publishtime": int2long(int(time.mktime(time.strptime(publish_time, "%Y-%m-%d")))),
|
|
|
}
|
|
|
if title is None:
|
|
|
- raise CustomCheckError(code=10107, reason='发布标题解析空值错误')
|
|
|
+ raise CustomCheckError(code=10107, reason='发布标题为空')
|
|
|
item['count'] = es_query(item["title"], item["l_np_publishtime"])
|
|
|
item['crawl'] = False
|
|
|
# print(f'>>> {title} - {competehref}')
|
|
@@ -173,16 +173,15 @@ class ListSpider:
|
|
|
crawl_total += 1
|
|
|
except JyBasicException as e:
|
|
|
sc.err_record(e)
|
|
|
-
|
|
|
+ logger.info(f'[采集失败]{menu.channel}-{region_name}-第{page}页-0条')
|
|
|
sc.wait_for_next_task(random.choice(range(2, 8)))
|
|
|
self.session.close()
|
|
|
|
|
|
def start(self):
|
|
|
- query = {'used': False, 'site': '元博网', 'class': 'list'}
|
|
|
- with Scheduler(query) as scheduler:
|
|
|
- scheduler.crawl_type = 'list'
|
|
|
- if scheduler.crawl_start:
|
|
|
- for menu in self.crawl_menus:
|
|
|
+ for menu in self.crawl_menus:
|
|
|
+ with Scheduler({'site': '元博网'}) as scheduler:
|
|
|
+ scheduler.crawl_type = 'list'
|
|
|
+ if scheduler.crawl_start:
|
|
|
self.user = scheduler.user
|
|
|
while True:
|
|
|
try:
|