12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- import pandas as pd
- from pymongo import MongoClient
- # MongoDB连接配置
- host = '192.168.3.149' # MongoDB主机地址
- port = 27180 # MongoDB端口
- dbname = 'data_quality' # 数据库名称
- collection_name = 'bidding_20241219_ai' # 集合名称
- # 创建MongoDB连接
- client = MongoClient(host, port)
- db = client[dbname]
- collection = db[collection_name]
- # 只查询 "score" 字段,减少不必要的数据传输
- data = pd.DataFrame(list(collection.find({}, {'score': 1}).batch_size(100)))
- # 确保将 "score" 字段转换为浮点数
- if "score" in data.columns:
- data['score'] = data['score'].astype(float)
- # 统计标讯总分数的分布
- score_counts = data['score'].value_counts().sort_index()
- total_scores = len(data['score'])
- # 计算百分比
- score_percentages = (score_counts / total_scores) * 100
- # 创建分布结果的 DataFrame
- score_distribution_df = pd.DataFrame({
- '标讯总分数': score_counts.index,
- '数量': score_counts.values,
- '百分比': score_percentages.values
- })
- # 百分比格式化为字符串,并附加百分号
- score_distribution_df['百分比'] = score_distribution_df['百分比'].apply(lambda x: f'{x:.2f}%')
- # 添加总量列
- score_distribution_df['总量'] = total_scores
- # 对分数进行倒序排序
- score_distribution_df = score_distribution_df.sort_values(by='标讯总分数', ascending=False)
- # 输出到 Excel 文件
- with pd.ExcelWriter('标讯总分数统计.xlsx', engine='openpyxl') as writer:
- score_distribution_df.to_excel(writer, sheet_name='标讯总分数分布', index=False)
- print("标讯总分数统计已保存到 '标讯总分数统计.xlsx' 文件中。")
|