liumiaomiao 6 月之前
父节点
当前提交
18244c2106
共有 1 个文件被更改,包括 95 次插入0 次删除
  1. 95 0
      tools/波动率计算/一份数据基于时间段计算波动率.py

+ 95 - 0
tools/波动率计算/一份数据基于时间段计算波动率.py

@@ -0,0 +1,95 @@
+from pymongo import MongoClient
+from datetime import datetime, timedelta
+
+# 连接到MongoDB
+client = MongoClient("mongodb://192.168.3.149:27180/")
+db = client['data_quality']
+collection = db['bid_analysis']
+
+# 定义两个任意时间段的边界
+# 时间段1
+period1_start = int(datetime(2025, 1, 23, 0, 0, 0).timestamp())  # 2025-01-20 00:00:00
+period1_end = int(datetime(2025, 1, 23, 23, 59, 59).timestamp())  # 2025-01-20 23:59:59
+
+# 时间段2
+period2_start = int(datetime(2025, 1, 24, 0, 0, 0).timestamp())  # 2025-01-21 00:00:00
+period2_end = int(datetime(2025, 1, 24, 23, 59, 59).timestamp())  # 2025-01-21 23:59:59
+
+# 聚合查询:计算两个时间段的数量和波动率
+pipeline = [
+    {
+        "$project": {
+            "spidercode": 1,
+            "create_time": 1,
+            "date": {"$toDate": {"$multiply": ["$create_time", 1000]}}  # 转换为毫秒级日期
+        }
+    },
+    {
+        "$group": {
+            "_id": "$spidercode",
+            "total_count": {"$sum": 1},
+            "period1_count": {
+                "$sum": {
+                    "$cond": [
+                        {"$and": [
+                            {"$gte": ["$create_time", period1_start]},
+                            {"$lte": ["$create_time", period1_end]}
+                        ]},
+                        1,
+                        0
+                    ]
+                }
+            },
+            "period2_count": {
+                "$sum": {
+                    "$cond": [
+                        {"$and": [
+                            {"$gte": ["$create_time", period2_start]},
+                            {"$lte": ["$create_time", period2_end]}
+                        ]},
+                        1,
+                        0
+                    ]
+                }
+            }
+        }
+    },
+    {
+        "$project": {
+            "spidercode": "$_id",
+            "total_count": 1,
+            "period1_count": 1,
+            "period2_count": 1,
+            "volatility": {
+                "$cond": [
+                    {"$eq": ["$period1_count", 0]},
+                    None,  # 如果 Period1 Count 为 0,则波动率为 None
+                    {
+                        "$divide": [
+                            {"$subtract": ["$period2_count", "$period1_count"]},
+                            "$period1_count"
+                        ]
+                    }
+                ]
+            }
+        }
+    }
+]
+
+# 执行聚合查询
+result = collection.aggregate(pipeline)
+
+# 打印结果
+for doc in result:
+    print(doc)
+
+'''
+示例输出:
+{
+    "spidercode": "spider1",
+    "total_count": 200,
+    "period1_count": 60,
+    "period2_count": 70,
+    "volatility": 0.1667
+}
+'''