|
@@ -0,0 +1,55 @@
|
|
|
|
+from pymongo import MongoClient
|
|
|
|
+
|
|
|
|
+# 连接到MongoDB
|
|
|
|
+client = MongoClient("mongodb://192.168.3.149:27180/")
|
|
|
|
+db = client['data_quality']
|
|
|
|
+
|
|
|
|
+# 假设有两个表:table1 和 table2
|
|
|
|
+table1_collection = db["bidding_20250117"]
|
|
|
|
+table2_collection = db["bidding_20250123"]
|
|
|
|
+
|
|
|
|
+# 聚合查询:计算table1中每个spidercode的数量
|
|
|
|
+pipeline_table1 = [
|
|
|
|
+ {
|
|
|
|
+ "$group": {
|
|
|
|
+ "_id": "$spidercode",
|
|
|
|
+ "table1_count": {"$sum": 1}
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+# 聚合查询:计算table2中每个spidercode的数量
|
|
|
|
+pipeline_table2 = [
|
|
|
|
+ {
|
|
|
|
+ "$group": {
|
|
|
|
+ "_id": "$spidercode",
|
|
|
|
+ "table2_count": {"$sum": 1}
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+# 执行聚合查询
|
|
|
|
+table1_result = table1_collection.aggregate(pipeline_table1)
|
|
|
|
+table2_result = table2_collection.aggregate(pipeline_table2)
|
|
|
|
+
|
|
|
|
+# 将结果转为字典格式,方便对比
|
|
|
|
+table1_counts = {doc["_id"]: doc["table1_count"] for doc in table1_result}
|
|
|
|
+table2_counts = {doc["_id"]: doc["table2_count"] for doc in table2_result}
|
|
|
|
+
|
|
|
|
+# 比较两张表中的spidercode,计算波动率
|
|
|
|
+for spidercode, table1_count in table1_counts.items():
|
|
|
|
+ table2_count = table2_counts.get(spidercode, 0)
|
|
|
|
+
|
|
|
|
+ # 计算波动率
|
|
|
|
+ if table1_count == 0:
|
|
|
|
+ volatility = None # 如果table1中数量为0,则波动率无法计算
|
|
|
|
+ else:
|
|
|
|
+ volatility = round((table2_count - table1_count) / table1_count,2)
|
|
|
|
+
|
|
|
|
+ # 输出结果
|
|
|
|
+ print({
|
|
|
|
+ "spidercode": spidercode,
|
|
|
|
+ "table1_count": table1_count,
|
|
|
|
+ "table2_count": table2_count,
|
|
|
|
+ "volatility": volatility
|
|
|
|
+ })
|