liumiaomiao 6 月之前
父節點
當前提交
2d1403f48c

二進制
tools/分数字段结果分析/mongo_data_statistics_combined1.xlsx


+ 0 - 0
tools/分数字段结果分析/DataExport_forTesting.py → tools/周报表格导出/DataExport_forTesting.py


+ 0 - 0
tools/波动率计算/一份数据基于时间段计算波动率.py


+ 55 - 0
tools/波动率计算/两份样本数据以站点为分组波动率输出.py

@@ -0,0 +1,55 @@
+from pymongo import MongoClient
+
+# 连接到MongoDB
+client = MongoClient("mongodb://192.168.3.149:27180/")
+db = client['data_quality']
+
+# 假设有两个表:table1 和 table2
+table1_collection = db["bidding_20250117"]
+table2_collection = db["bidding_20250123"]
+
+# 聚合查询:计算table1中每个site的数量
+pipeline_table1 = [
+    {
+        "$group": {
+            "_id": "$site",
+            "table1_count": {"$sum": 1}
+        }
+    }
+]
+
+# 聚合查询:计算table2中每个site的数量
+pipeline_table2 = [
+    {
+        "$group": {
+            "_id": "$site",
+            "table2_count": {"$sum": 1}
+        }
+    }
+]
+
+# 执行聚合查询
+table1_result = table1_collection.aggregate(pipeline_table1)
+table2_result = table2_collection.aggregate(pipeline_table2)
+
+# 将结果转为字典格式,方便对比
+table1_counts = {doc["_id"]: doc["table1_count"] for doc in table1_result}
+table2_counts = {doc["_id"]: doc["table2_count"] for doc in table2_result}
+
+# 比较两张表中的site,计算波动率
+for site, table1_count in table1_counts.items():
+    table2_count = table2_counts.get(site, 0)
+
+    # 计算波动率
+    if table1_count == 0:
+        volatility = None  # 如果table1中数量为0,则波动率无法计算
+    else:
+        volatility = round((table2_count - table1_count) / table1_count,2)
+
+    # 输出结果
+    print({
+        "site": site,
+        "table1_count": table1_count,
+        "table2_count": table2_count,
+        "volatility": volatility
+    })

+ 55 - 0
tools/波动率计算/两份样本数据基于爬虫代码输出波动率.py

@@ -0,0 +1,55 @@
+from pymongo import MongoClient
+
+# 连接到MongoDB
+client = MongoClient("mongodb://192.168.3.149:27180/")
+db = client['data_quality']
+
+# 假设有两个表:table1 和 table2
+table1_collection = db["bidding_20250117"]
+table2_collection = db["bidding_20250123"]
+
+# 聚合查询:计算table1中每个spidercode的数量
+pipeline_table1 = [
+    {
+        "$group": {
+            "_id": "$spidercode",
+            "table1_count": {"$sum": 1}
+        }
+    }
+]
+
+# 聚合查询:计算table2中每个spidercode的数量
+pipeline_table2 = [
+    {
+        "$group": {
+            "_id": "$spidercode",
+            "table2_count": {"$sum": 1}
+        }
+    }
+]
+
+# 执行聚合查询
+table1_result = table1_collection.aggregate(pipeline_table1)
+table2_result = table2_collection.aggregate(pipeline_table2)
+
+# 将结果转为字典格式,方便对比
+table1_counts = {doc["_id"]: doc["table1_count"] for doc in table1_result}
+table2_counts = {doc["_id"]: doc["table2_count"] for doc in table2_result}
+
+# 比较两张表中的spidercode,计算波动率
+for spidercode, table1_count in table1_counts.items():
+    table2_count = table2_counts.get(spidercode, 0)
+
+    # 计算波动率
+    if table1_count == 0:
+        volatility = None  # 如果table1中数量为0,则波动率无法计算
+    else:
+        volatility = round((table2_count - table1_count) / table1_count,2)
+
+    # 输出结果
+    print({
+        "spidercode": spidercode,
+        "table1_count": table1_count,
+        "table2_count": table2_count,
+        "volatility": volatility
+    })