import pandas as pd
from pymongo import MongoClient

# MongoDB连接配置
host = '192.168.3.149'  # MongoDB主机地址
port = 27180  # MongoDB端口
dbname = 'data_quality'  # 数据库名称
collection_name = 'bidding_20241219_ai'  # 集合名称

# 创建MongoDB连接
client = MongoClient(host, port)
db = client[dbname]
collection = db[collection_name]

# 只查询 "score" 字段，减少不必要的数据传输
data = pd.DataFrame(list(collection.find({}, {'score': 1}).batch_size(100)))

# 确保将 "score" 字段转换为浮点数
if "score" in data.columns:
    data['score'] = data['score'].astype(float)

    # 统计标讯总分数的分布
    score_counts = data['score'].value_counts().sort_index()
    total_scores = len(data['score'])

    # 计算百分比
    score_percentages = (score_counts / total_scores) * 100

    # 创建分布结果的 DataFrame
    score_distribution_df = pd.DataFrame({
        '标讯总分数': score_counts.index,
        '数量': score_counts.values,
        '百分比': score_percentages.values
    })

    # 百分比格式化为字符串，并附加百分号
    score_distribution_df['百分比'] = score_distribution_df['百分比'].apply(lambda x: f'{x:.2f}%')

    # 添加总量列
    score_distribution_df['总量'] = total_scores

    # 对分数进行倒序排序
    score_distribution_df = score_distribution_df.sort_values(by='标讯总分数', ascending=False)

    # 输出到 Excel 记录
    with pd.ExcelWriter('标讯总分数统计.xlsx', engine='openpyxl') as writer:
        score_distribution_df.to_excel(writer, sheet_name='标讯总分数分布', index=False)

    print("标讯总分数统计已保存到 '标讯总分数统计.xlsx' 文件中。")