|
@@ -189,13 +189,11 @@ class DetailSpider:
|
|
|
item["comeintime"] = int2long(int(time.time()))
|
|
|
'''检查清洗之后的详情'''
|
|
|
CheckText(item["detail"])
|
|
|
- del item['count'], item['crawl']
|
|
|
- if 'crawl_status' in item:
|
|
|
- del item['crawl_status']
|
|
|
- try:
|
|
|
- self.save_tab.insert_one(item)
|
|
|
- except DuplicateKeyError:
|
|
|
- pass
|
|
|
+ insert = {}
|
|
|
+ for key, val in item.items():
|
|
|
+ if key not in ['crawl_status', 'crawl', 'count', '_id']:
|
|
|
+ insert[key] = val
|
|
|
+ self.save_tab.insert_one(insert)
|
|
|
logger.info('[采集成功]{}-{}'.format(item['title'], item['publishtime']))
|
|
|
|
|
|
def crawl_spider(self, sc: Scheduler):
|