from pymongo import MongoClient from datetime import datetime, timedelta # 连接到MongoDB client = MongoClient("mongodb://192.168.3.149:27180/") db = client['data_quality'] collection = db['bid_analysis'] # 定义两个任意时间段的边界 # 时间段1 period1_start = int(datetime(2025, 1, 20, 0, 0, 0).timestamp()) # 2025-01-20 00:00:00 period1_end = int(datetime(2025, 1, 20, 23, 59, 59).timestamp()) # 2025-01-20 23:59:59 # 时间段2 period2_start = int(datetime(2025, 1, 21, 0, 0, 0).timestamp()) # 2025-01-21 00:00:00 period2_end = int(datetime(2025, 1, 21, 23, 59, 59).timestamp()) # 2025-01-21 23:59:59 # 聚合查询:计算两个时间段的数量和波动率 pipeline = [ { "$project": { "spidercode": 1, "create_time": 1, "date": {"$toDate": {"$multiply": ["$create_time", 1000]}} # 转换为毫秒级日期 } }, { "$group": { "_id": "$spidercode", "total_count": {"$sum": 1}, "period1_count": { "$sum": { "$cond": [ {"$and": [ {"$gte": ["$create_time", period1_start]}, {"$lte": ["$create_time", period1_end]} ]}, 1, 0 ] } }, "period2_count": { "$sum": { "$cond": [ {"$and": [ {"$gte": ["$create_time", period2_start]}, {"$lte": ["$create_time", period2_end]} ]}, 1, 0 ] } } } }, { "$project": { "spidercode": "$_id", "total_count": 1, "period1_count": 1, "period2_count": 1, "volatility": { "$cond": [ {"$eq": ["$period1_count", 0]}, None, # 如果 Period1 Count 为 0,则波动率为 None { "$divide": [ {"$subtract": ["$period2_count", "$period1_count"]}, "$period1_count" ] } ] } } } ] # 执行聚合查询 result = collection.aggregate(pipeline) # 打印结果 for doc in result: print(doc) ''' 示例输出: { "spidercode": "spider1", "total_count": 200, "period1_count": 60, "period2_count": 70, "volatility": 0.1667 } '''