|
@@ -0,0 +1,95 @@
|
|
|
|
+from pymongo import MongoClient
|
|
|
|
+from datetime import datetime, timedelta
|
|
|
|
+
|
|
|
|
+# 连接到MongoDB
|
|
|
|
+client = MongoClient("mongodb://192.168.3.149:27180/")
|
|
|
|
+db = client['data_quality']
|
|
|
|
+collection = db['bid_analysis']
|
|
|
|
+
|
|
|
|
+# 定义两个任意时间段的边界
|
|
|
|
+# 时间段1
|
|
|
|
+period1_start = int(datetime(2025, 1, 23, 0, 0, 0).timestamp()) # 2025-01-20 00:00:00
|
|
|
|
+period1_end = int(datetime(2025, 1, 23, 23, 59, 59).timestamp()) # 2025-01-20 23:59:59
|
|
|
|
+
|
|
|
|
+# 时间段2
|
|
|
|
+period2_start = int(datetime(2025, 1, 24, 0, 0, 0).timestamp()) # 2025-01-21 00:00:00
|
|
|
|
+period2_end = int(datetime(2025, 1, 24, 23, 59, 59).timestamp()) # 2025-01-21 23:59:59
|
|
|
|
+
|
|
|
|
+# 聚合查询:计算两个时间段的数量和波动率
|
|
|
|
+pipeline = [
|
|
|
|
+ {
|
|
|
|
+ "$project": {
|
|
|
|
+ "spidercode": 1,
|
|
|
|
+ "create_time": 1,
|
|
|
|
+ "date": {"$toDate": {"$multiply": ["$create_time", 1000]}} # 转换为毫秒级日期
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "$group": {
|
|
|
|
+ "_id": "$spidercode",
|
|
|
|
+ "total_count": {"$sum": 1},
|
|
|
|
+ "period1_count": {
|
|
|
|
+ "$sum": {
|
|
|
|
+ "$cond": [
|
|
|
|
+ {"$and": [
|
|
|
|
+ {"$gte": ["$create_time", period1_start]},
|
|
|
|
+ {"$lte": ["$create_time", period1_end]}
|
|
|
|
+ ]},
|
|
|
|
+ 1,
|
|
|
|
+ 0
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "period2_count": {
|
|
|
|
+ "$sum": {
|
|
|
|
+ "$cond": [
|
|
|
|
+ {"$and": [
|
|
|
|
+ {"$gte": ["$create_time", period2_start]},
|
|
|
|
+ {"$lte": ["$create_time", period2_end]}
|
|
|
|
+ ]},
|
|
|
|
+ 1,
|
|
|
|
+ 0
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "$project": {
|
|
|
|
+ "spidercode": "$_id",
|
|
|
|
+ "total_count": 1,
|
|
|
|
+ "period1_count": 1,
|
|
|
|
+ "period2_count": 1,
|
|
|
|
+ "volatility": {
|
|
|
|
+ "$cond": [
|
|
|
|
+ {"$eq": ["$period1_count", 0]},
|
|
|
|
+ None, # 如果 Period1 Count 为 0,则波动率为 None
|
|
|
|
+ {
|
|
|
|
+ "$divide": [
|
|
|
|
+ {"$subtract": ["$period2_count", "$period1_count"]},
|
|
|
|
+ "$period1_count"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+# 执行聚合查询
|
|
|
|
+result = collection.aggregate(pipeline)
|
|
|
|
+
|
|
|
|
+# 打印结果
|
|
|
|
+for doc in result:
|
|
|
|
+ print(doc)
|
|
|
|
+
|
|
|
|
+'''
|
|
|
|
+示例输出:
|
|
|
|
+{
|
|
|
|
+ "spidercode": "spider1",
|
|
|
|
+ "total_count": 200,
|
|
|
|
+ "period1_count": 60,
|
|
|
|
+ "period2_count": 70,
|
|
|
|
+ "volatility": 0.1667
|
|
|
|
+}
|
|
|
|
+'''
|