score_ai.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import pandas as pd
  2. from pymongo import MongoClient
  3. # MongoDB连接配置
  4. host = '192.168.3.149' # MongoDB主机地址
  5. port = 27180 # MongoDB端口
  6. dbname = 'data_quality' # 数据库名称
  7. collection_name = 'bidding_20241219_ai' # 集合名称
  8. # 创建MongoDB连接
  9. client = MongoClient(host, port)
  10. db = client[dbname]
  11. collection = db[collection_name]
  12. # 只查询 "score" 字段,减少不必要的数据传输
  13. data = pd.DataFrame(list(collection.find({}, {'score': 1}).batch_size(100)))
  14. # 确保将 "score" 字段转换为浮点数
  15. if "score" in data.columns:
  16. data['score'] = data['score'].astype(float)
  17. # 统计标讯总分数的分布
  18. score_counts = data['score'].value_counts().sort_index()
  19. total_scores = len(data['score'])
  20. # 计算百分比
  21. score_percentages = (score_counts / total_scores) * 100
  22. # 创建分布结果的 DataFrame
  23. score_distribution_df = pd.DataFrame({
  24. '标讯总分数': score_counts.index,
  25. '数量': score_counts.values,
  26. '百分比': score_percentages.values
  27. })
  28. # 百分比格式化为字符串,并附加百分号
  29. score_distribution_df['百分比'] = score_distribution_df['百分比'].apply(lambda x: f'{x:.2f}%')
  30. # 添加总量列
  31. score_distribution_df['总量'] = total_scores
  32. # 对分数进行倒序排序
  33. score_distribution_df = score_distribution_df.sort_values(by='标讯总分数', ascending=False)
  34. # 输出到 Excel 文件
  35. with pd.ExcelWriter('标讯总分数统计.xlsx', engine='openpyxl') as writer:
  36. score_distribution_df.to_excel(writer, sheet_name='标讯总分数分布', index=False)
  37. print("标讯总分数统计已保存到 '标讯总分数统计.xlsx' 文件中。")