import pandas as pd from pymongo import MongoClient # MongoDB连接配置 host = '192.168.3.149' # MongoDB主机地址 port = 27180 # MongoDB端口 dbname = 'data_quality' # 数据库名称 collection_name = 'bidding_20241219_ai' # 集合名称 # 创建MongoDB连接 client = MongoClient(host, port) db = client[dbname] collection = db[collection_name] # 只查询 "score" 字段,减少不必要的数据传输 data = pd.DataFrame(list(collection.find({}, {'score': 1}).batch_size(100))) # 确保将 "score" 字段转换为浮点数 if "score" in data.columns: data['score'] = data['score'].astype(float) # 统计标讯总分数的分布 score_counts = data['score'].value_counts().sort_index() total_scores = len(data['score']) # 计算百分比 score_percentages = (score_counts / total_scores) * 100 # 创建分布结果的 DataFrame score_distribution_df = pd.DataFrame({ '标讯总分数': score_counts.index, '数量': score_counts.values, '百分比': score_percentages.values }) # 百分比格式化为字符串,并附加百分号 score_distribution_df['百分比'] = score_distribution_df['百分比'].apply(lambda x: f'{x:.2f}%') # 添加总量列 score_distribution_df['总量'] = total_scores # 对分数进行倒序排序 score_distribution_df = score_distribution_df.sort_values(by='标讯总分数', ascending=False) # 输出到 Excel 记录 with pd.ExcelWriter('标讯总分数统计.xlsx', engine='openpyxl') as writer: score_distribution_df.to_excel(writer, sheet_name='标讯总分数分布', index=False) print("标讯总分数统计已保存到 '标讯总分数统计.xlsx' 文件中。")