dongzhaorui@topnet.net.cn 3 жил өмнө
parent
commit
06b7b28ba5
1 өөрчлөгдсөн 6 нэмэгдсэн , 7 устгасан
  1. 6 7
      ybw/list_spider.py

+ 6 - 7
ybw/list_spider.py

@@ -113,7 +113,7 @@ class ListSpider:
                 "l_np_publishtime": int2long(int(time.mktime(time.strptime(publish_time, "%Y-%m-%d")))),
             }
             if title is None:
-                raise CustomCheckError(code=10107, reason='发布标题解析空值错误')
+                raise CustomCheckError(code=10107, reason='发布标题为空')
             item['count'] = es_query(item["title"], item["l_np_publishtime"])
             item['crawl'] = False
             # print(f'>>> {title} - {competehref}')
@@ -173,16 +173,15 @@ class ListSpider:
                         crawl_total += 1
                 except JyBasicException as e:
                     sc.err_record(e)
-
+                    logger.info(f'[采集失败]{menu.channel}-{region_name}-第{page}页-0条')
                 sc.wait_for_next_task(random.choice(range(2, 8)))
             self.session.close()
 
     def start(self):
-        query = {'used': False, 'site': '元博网', 'class': 'list'}
-        with Scheduler(query) as scheduler:
-            scheduler.crawl_type = 'list'
-            if scheduler.crawl_start:
-                for menu in self.crawl_menus:
+        for menu in self.crawl_menus:
+            with Scheduler({'site': '元博网'}) as scheduler:
+                scheduler.crawl_type = 'list'
+                if scheduler.crawl_start:
                     self.user = scheduler.user
                     while True:
                         try: