1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495 |
- from pymongo import MongoClient
- from datetime import datetime, timedelta
- # 连接到MongoDB
- client = MongoClient("mongodb://192.168.3.149:27180/")
- db = client['data_quality']
- collection = db['bid_analysis']
- # 定义两个任意时间段的边界
- # 时间段1
- period1_start = int(datetime(2025, 1, 20, 0, 0, 0).timestamp()) # 2025-01-20 00:00:00
- period1_end = int(datetime(2025, 1, 20, 23, 59, 59).timestamp()) # 2025-01-20 23:59:59
- # 时间段2
- period2_start = int(datetime(2025, 1, 21, 0, 0, 0).timestamp()) # 2025-01-21 00:00:00
- period2_end = int(datetime(2025, 1, 21, 23, 59, 59).timestamp()) # 2025-01-21 23:59:59
- # 聚合查询:计算两个时间段的数量和波动率
- pipeline = [
- {
- "$project": {
- "spidercode": 1,
- "create_time": 1,
- "date": {"$toDate": {"$multiply": ["$create_time", 1000]}} # 转换为毫秒级日期
- }
- },
- {
- "$group": {
- "_id": "$spidercode",
- "total_count": {"$sum": 1},
- "period1_count": {
- "$sum": {
- "$cond": [
- {"$and": [
- {"$gte": ["$create_time", period1_start]},
- {"$lte": ["$create_time", period1_end]}
- ]},
- 1,
- 0
- ]
- }
- },
- "period2_count": {
- "$sum": {
- "$cond": [
- {"$and": [
- {"$gte": ["$create_time", period2_start]},
- {"$lte": ["$create_time", period2_end]}
- ]},
- 1,
- 0
- ]
- }
- }
- }
- },
- {
- "$project": {
- "spidercode": "$_id",
- "total_count": 1,
- "period1_count": 1,
- "period2_count": 1,
- "volatility": {
- "$cond": [
- {"$eq": ["$period1_count", 0]},
- None, # 如果 Period1 Count 为 0,则波动率为 None
- {
- "$divide": [
- {"$subtract": ["$period2_count", "$period1_count"]},
- "$period1_count"
- ]
- }
- ]
- }
- }
- }
- ]
- # 执行聚合查询
- result = collection.aggregate(pipeline)
- # 打印结果
- for doc in result:
- print(doc)
- '''
- 示例输出:
- {
- "spidercode": "spider1",
- "total_count": 200,
- "period1_count": 60,
- "period2_count": 70,
- "volatility": 0.1667
- }
- '''
|