|
@@ -1,4 +1,4 @@
|
|
|
-# 导入所需库
|
|
|
+# 瀵煎叆鎵€闇€搴�
|
|
|
from datetime import datetime
|
|
|
|
|
|
import pandas as pd
|
|
@@ -7,7 +7,7 @@ from openpyxl import load_workbook
|
|
|
from sympy.physics.continuum_mechanics.beam import numpy
|
|
|
|
|
|
|
|
|
-# 将这个函数定义放在你的脚本顶部或其他函数定义附近
|
|
|
+# 灏嗚繖涓�嚱鏁板畾涔夋斁鍦ㄤ綘鐨勮剼鏈�《閮ㄦ垨鍏朵粬鍑芥暟瀹氫箟闄勮繎
|
|
|
def convert_numpy_int(obj):
|
|
|
if isinstance(obj, numpy.int64):
|
|
|
return int(obj)
|
|
@@ -18,73 +18,73 @@ def convert_numpy_int(obj):
|
|
|
else:
|
|
|
return obj
|
|
|
|
|
|
-# MongoDB连接配置
|
|
|
-host = '192.168.3.149' # MongoDB主机地址
|
|
|
-port = 27180 # MongoDB端口
|
|
|
-dbname = 'data_quality' # 数据库名称
|
|
|
-collection_name = 'bidding_20241033' # 集合名称
|
|
|
+# MongoDB杩炴帴閰嶇疆
|
|
|
+host = '192.168.3.149' # MongoDB涓绘満鍦板潃
|
|
|
+port = 27180 # MongoDB绔�彛
|
|
|
+dbname = 'data_quality' # 鏁版嵁搴撳悕绉�
|
|
|
+collection_name = 'bidding_20241033' # 闆嗗悎鍚嶇О
|
|
|
|
|
|
-# 创建MongoDB连接
|
|
|
+# 鍒涘缓MongoDB杩炴帴
|
|
|
client = MongoClient(host, port)
|
|
|
db = client[dbname]
|
|
|
collection = db[collection_name]
|
|
|
|
|
|
-# 从MongoDB读取数据
|
|
|
+# 浠嶮ongoDB璇诲彇鏁版嵁
|
|
|
data = pd.DataFrame(list(collection.find()))
|
|
|
|
|
|
-# 定义字段中英文映射
|
|
|
+# 瀹氫箟瀛楁�涓�嫳鏂囨槧灏�
|
|
|
column_name_mapping = {
|
|
|
- "area_qa": "省份",
|
|
|
- "bidamount_qa": "中标金额",
|
|
|
- "budget_qa": "预算",
|
|
|
- "buyer_qa": "采购单位",
|
|
|
- "multipackage_qa": "分包",
|
|
|
- "projectcode_qa": "项目编号",
|
|
|
- "projectname_qa": "项目名称",
|
|
|
- "title_qa": "标题",
|
|
|
- "winner_qa": "中标单位",
|
|
|
- "score": "标讯总分数",
|
|
|
- "bidopentime_qa": "开标时间",
|
|
|
- "publishtime_qa": "发布时间",
|
|
|
- "toptype_qa": "信息一级分类",
|
|
|
- "subtype_qa": "信息二级分类"
|
|
|
+ "area_qa": "鐪佷唤",
|
|
|
+ "bidamount_qa": "涓�爣閲戦�",
|
|
|
+ "budget_qa": "棰勭畻",
|
|
|
+ "buyer_qa": "閲囪喘鍗曚綅",
|
|
|
+ "multipackage_qa": "鍒嗗寘",
|
|
|
+ "projectcode_qa": "椤圭洰缂栧彿",
|
|
|
+ "projectname_qa": "椤圭洰鍚嶇О",
|
|
|
+ "title_qa": "鏍囬�",
|
|
|
+ "winner_qa": "涓�爣鍗曚綅",
|
|
|
+ "score": "鏍囪�鎬诲垎鏁�",
|
|
|
+ "bidopentime_qa": "寮€鏍囨椂闂�",
|
|
|
+ "publishtime_qa": "鍙戝竷鏃堕棿",
|
|
|
+ "toptype_qa": "淇℃伅涓€绾у垎绫�",
|
|
|
+ "subtype_qa": "淇℃伅浜岀骇鍒嗙被"
|
|
|
}
|
|
|
|
|
|
-# 将字段名称更改为中文
|
|
|
+# 灏嗗瓧娈靛悕绉版洿鏀逛负涓�枃
|
|
|
data.rename(columns=column_name_mapping, inplace=True)
|
|
|
-# 检查列是否存在并输出列名
|
|
|
-print("当前的列名:")
|
|
|
+# 妫€鏌ュ垪鏄�惁瀛樺湪骞惰緭鍑哄垪鍚�
|
|
|
+print("褰撳墠鐨勫垪鍚嶏細")
|
|
|
print(data.columns)
|
|
|
|
|
|
-# 定义你期望检查的列
|
|
|
-expected_columns = ["信息一级分类", "信息二级分类"]
|
|
|
+# 瀹氫箟浣犳湡鏈涙�鏌ョ殑鍒�
|
|
|
+expected_columns = ["淇℃伅涓€绾у垎绫�", "淇℃伅浜岀骇鍒嗙被"]
|
|
|
|
|
|
-# 循环检查每一列
|
|
|
+# 寰�幆妫€鏌ユ瘡涓€鍒�
|
|
|
for col in expected_columns:
|
|
|
if col in data.columns:
|
|
|
- print(f"列 '{col}' 存在于数据框架中。")
|
|
|
- # 列存在时,打印这一列的值分布
|
|
|
- print(f"{col} 字段值分布:")
|
|
|
+ print(f"鍒� '{col}' 瀛樺湪浜庢暟鎹��鏋朵腑銆�")
|
|
|
+ # 鍒楀瓨鍦ㄦ椂锛屾墦鍗拌繖涓€鍒楃殑鍊煎垎甯�
|
|
|
+ print(f"{col} 瀛楁�鍊煎垎甯冿細")
|
|
|
print(data[col].value_counts(dropna=False))
|
|
|
else:
|
|
|
- print(f"警告:列 '{col}' 不在数据框架中。")
|
|
|
-
|
|
|
-# 打印语句来检查 '一级分类' 和 '二级分类' 字段的值
|
|
|
-# print("一级分类字段值分布:")
|
|
|
-# print(data['信息一级分类'].value_counts(dropna=False))
|
|
|
-# print("\n二级分类字段值分布:")
|
|
|
-# print(data['信息二级分类'].value_counts(dropna=False))
|
|
|
-# 关闭MongoDB连接
|
|
|
+ print(f"璀﹀憡锛氬垪 '{col}' 涓嶅湪鏁版嵁妗嗘灦涓�€�")
|
|
|
+
|
|
|
+# 鎵撳嵃璇�彞鏉ユ�鏌� '涓€绾у垎绫�' 鍜� '浜岀骇鍒嗙被' 瀛楁�鐨勫€�
|
|
|
+# print("涓€绾у垎绫诲瓧娈靛€煎垎甯冿細")
|
|
|
+# print(data['淇℃伅涓€绾у垎绫�'].value_counts(dropna=False))
|
|
|
+# print("\n浜岀骇鍒嗙被瀛楁�鍊煎垎甯冿細")
|
|
|
+# print(data['淇℃伅浜岀骇鍒嗙被'].value_counts(dropna=False))
|
|
|
+# 鍏抽棴MongoDB杩炴帴
|
|
|
client.close()
|
|
|
-# analyze_column 函数,处理 NaN 值
|
|
|
+# analyze_column 鍑芥暟锛屽�鐞� NaN 鍊�
|
|
|
def analyze_column(dataframe, column_name):
|
|
|
if column_name not in dataframe.columns:
|
|
|
- # 字段不存在时,认为所有记录都是正确的
|
|
|
+ # 瀛楁�涓嶅瓨鍦ㄦ椂锛岃�涓烘墍鏈夎�褰曢兘鏄��纭�殑
|
|
|
total = len(dataframe)
|
|
|
correct = total
|
|
|
error = 0
|
|
|
else:
|
|
|
- # 对于存在的字段,NaN 和空字典 {} 视为正确,其他视为错误
|
|
|
+ # 瀵逛簬瀛樺湪鐨勫瓧娈碉紝NaN 鍜岀┖瀛楀吀 {} 瑙嗕负姝g‘锛屽叾浠栬�涓洪敊璇�
|
|
|
total = len(dataframe[column_name])
|
|
|
correct = dataframe[column_name].apply(lambda x: pd.isna(x) or x == {}).sum()
|
|
|
error = total - correct
|
|
@@ -92,62 +92,62 @@ def analyze_column(dataframe, column_name):
|
|
|
accuracy = correct / total if total > 0 else 0
|
|
|
error_rate = error / total if total > 0 else 0
|
|
|
|
|
|
- # 收集错误原因
|
|
|
+ # 鏀堕泦閿欒�鍘熷洜
|
|
|
error_reasons = dataframe[column_name].apply(
|
|
|
lambda x: x if x != {} and not pd.isna(x) else None).dropna().value_counts()
|
|
|
|
|
|
return total, correct, error, accuracy, error_rate, error_reasons
|
|
|
|
|
|
-# 重新格式化错误原因的数据结构
|
|
|
+# 閲嶆柊鏍煎紡鍖栭敊璇�師鍥犵殑鏁版嵁缁撴瀯
|
|
|
def reformat_error_reasons_safe(error_reasons_series):
|
|
|
- # 初始化一个空字典,用于存储重新格式化的错误原因
|
|
|
+ # 鍒濆�鍖栦竴涓�┖瀛楀吀锛岀敤浜庡瓨鍌ㄩ噸鏂版牸寮忓寲鐨勯敊璇�師鍥�
|
|
|
reformatted_reasons = {}
|
|
|
|
|
|
- # 遍历错误原因字典及其对应的次数
|
|
|
+ # 閬嶅巻閿欒�鍘熷洜瀛楀吀鍙婂叾瀵瑰簲鐨勬�鏁�
|
|
|
for error_dict, count in error_reasons_series.items():
|
|
|
- if isinstance(error_dict, dict): # 如果是字典类型的错误原因
|
|
|
+ if isinstance(error_dict, dict): # 濡傛灉鏄�瓧鍏哥被鍨嬬殑閿欒�鍘熷洜
|
|
|
for error_code, reason in error_dict.items():
|
|
|
- # 检查原因字符串是否包含逗号
|
|
|
+ # 妫€鏌ュ師鍥犲瓧绗︿覆鏄�惁鍖呭惈閫楀彿
|
|
|
if ',' in reason:
|
|
|
parts = reason.split(',')
|
|
|
formatted_reason = parts[1].strip()
|
|
|
else:
|
|
|
formatted_reason = reason.strip()
|
|
|
|
|
|
- # 如果格式化后的原因非空,则构建键值对并更新字典
|
|
|
+ # 濡傛灉鏍煎紡鍖栧悗鐨勫師鍥犻潪绌猴紝鍒欐瀯寤洪敭鍊煎�骞舵洿鏂板瓧鍏�
|
|
|
if formatted_reason:
|
|
|
key = (formatted_reason,)
|
|
|
if key not in reformatted_reasons:
|
|
|
reformatted_reasons[key] = count
|
|
|
else:
|
|
|
reformatted_reasons[key] += count
|
|
|
- elif isinstance(error_dict, list): # 如果是列表类型的错误原因
|
|
|
+ elif isinstance(error_dict, list): # 濡傛灉鏄�垪琛ㄧ被鍨嬬殑閿欒�鍘熷洜
|
|
|
key = (tuple(error_dict),) if error_dict else None
|
|
|
if key not in reformatted_reasons:
|
|
|
reformatted_reasons[key] = count
|
|
|
else:
|
|
|
reformatted_reasons[key] += count
|
|
|
- else: # 其他类型的错误原因
|
|
|
+ else: # 鍏朵粬绫诲瀷鐨勯敊璇�師鍥�
|
|
|
key = (error_dict,) if error_dict else None
|
|
|
if key not in reformatted_reasons:
|
|
|
reformatted_reasons[key] = count
|
|
|
else:
|
|
|
reformatted_reasons[key] += count
|
|
|
|
|
|
- # 构建最终格式化后的结果字典,去除空键和空字符串键
|
|
|
+ # 鏋勫缓鏈€缁堟牸寮忓寲鍚庣殑缁撴灉瀛楀吀锛屽幓闄ょ┖閿�拰绌哄瓧绗︿覆閿�
|
|
|
formatted_results = {
|
|
|
str(key[0]): value for key, value in reformatted_reasons.items() if key and key[0] != ''
|
|
|
}
|
|
|
return formatted_results
|
|
|
|
|
|
|
|
|
-# 对每个字段进行分析
|
|
|
-fields_to_analyze = ["省份", "中标金额", "预算", "采购单位", "分包", "项目编号", "项目名称", "标题", "中标单位",
|
|
|
- "开标时间", "发布时间", "信息一级分类", "信息二级分类"]
|
|
|
+# 瀵规瘡涓�瓧娈佃繘琛屽垎鏋�
|
|
|
+fields_to_analyze = ["鐪佷唤", "涓�爣閲戦�", "棰勭畻", "閲囪喘鍗曚綅", "鍒嗗寘", "椤圭洰缂栧彿", "椤圭洰鍚嶇О", "鏍囬�", "涓�爣鍗曚綅",
|
|
|
+ "寮€鏍囨椂闂�", "鍙戝竷鏃堕棿", "淇℃伅涓€绾у垎绫�", "淇℃伅浜岀骇鍒嗙被"]
|
|
|
expanded_analysis_results = []
|
|
|
|
|
|
for col in fields_to_analyze:
|
|
|
- if col in data.columns: # 在尝试分析之前检查字段是否存在
|
|
|
+ if col in data.columns: # 鍦ㄥ皾璇曞垎鏋愪箣鍓嶆�鏌ュ瓧娈垫槸鍚﹀瓨鍦�
|
|
|
total, correct, error, accuracy, error_rate, error_reasons = analyze_column(data, col)
|
|
|
reformatted_error_reasons = reformat_error_reasons_safe(error_reasons)
|
|
|
|
|
@@ -156,63 +156,63 @@ for col in fields_to_analyze:
|
|
|
if error > 0:
|
|
|
single_reason_error_rate = count / error
|
|
|
else:
|
|
|
- single_reason_error_rate = 0 # 防止除以零的情况
|
|
|
+ single_reason_error_rate = 0 # 闃叉�闄や互闆剁殑鎯呭喌
|
|
|
|
|
|
expanded_analysis_results.append({
|
|
|
- '字段': col,
|
|
|
- '总量': total,
|
|
|
- '正确数量': correct,
|
|
|
- '错误数量': error,
|
|
|
- '正确率': f'{accuracy:.2%}',
|
|
|
- '错误率': f'{error_rate:.2%}',
|
|
|
- '错误原因': reason,
|
|
|
- '错误次数': count,
|
|
|
- '单个原因错误率': f'{single_reason_error_rate:.2%}'
|
|
|
+ '瀛楁�': col,
|
|
|
+ '鎬婚噺': total,
|
|
|
+ '姝g‘鏁伴噺': correct,
|
|
|
+ '閿欒�鏁伴噺': error,
|
|
|
+ '姝g‘鐜�': f'{accuracy:.2%}',
|
|
|
+ '閿欒�鐜�': f'{error_rate:.2%}',
|
|
|
+ '閿欒�鍘熷洜': reason,
|
|
|
+ '閿欒�娆℃暟': count,
|
|
|
+ '鍗曚釜鍘熷洜閿欒�鐜�': f'{single_reason_error_rate:.2%}'
|
|
|
})
|
|
|
else:
|
|
|
- print(f"警告:列 '{col}' 不在数据框架中,将跳过此字段。")
|
|
|
+ print(f"璀﹀憡锛氬垪 '{col}' 涓嶅湪鏁版嵁妗嗘灦涓�紝灏嗚烦杩囨�瀛楁�銆�")
|
|
|
|
|
|
-# 创建DataFrame并可能进行后续操作
|
|
|
+# 鍒涘缓DataFrame骞跺彲鑳借繘琛屽悗缁�搷浣�
|
|
|
expanded_analysis_results_df = pd.DataFrame(expanded_analysis_results)
|
|
|
-# "标讯总分数" 字段的分布
|
|
|
-if "标讯总分数" in data.columns:
|
|
|
- # 转换为浮点数
|
|
|
- data['标讯总分数'] = data['标讯总分数'].astype(float)
|
|
|
- score_counts = data['标讯总分数'].value_counts().sort_index()
|
|
|
- total_scores = len(data['标讯总分数'])
|
|
|
+# "鏍囪�鎬诲垎鏁�" 瀛楁�鐨勫垎甯�
|
|
|
+if "鏍囪�鎬诲垎鏁�" in data.columns:
|
|
|
+ # 杞�崲涓烘诞鐐规暟
|
|
|
+ data['鏍囪�鎬诲垎鏁�'] = data['鏍囪�鎬诲垎鏁�'].astype(float)
|
|
|
+ score_counts = data['鏍囪�鎬诲垎鏁�'].value_counts().sort_index()
|
|
|
+ total_scores = len(data['鏍囪�鎬诲垎鏁�'])
|
|
|
score_percentages = (score_counts / total_scores) * 100
|
|
|
score_distribution_df = pd.DataFrame({
|
|
|
- '标讯总分数': score_counts.index,
|
|
|
- '数量': score_counts.values,
|
|
|
- '百分比': score_percentages.values
|
|
|
+ '鏍囪�鎬诲垎鏁�': score_counts.index,
|
|
|
+ '鏁伴噺': score_counts.values,
|
|
|
+ '鐧惧垎姣�': score_percentages.values
|
|
|
})
|
|
|
- # 确保得分正确转换为浮点数
|
|
|
- data['标讯总分数'] = data['标讯总分数'].apply(float)
|
|
|
- # 计算得分为100的数量,确保类型匹配
|
|
|
+ # 纭�繚寰楀垎姝g‘杞�崲涓烘诞鐐规暟
|
|
|
+ data['鏍囪�鎬诲垎鏁�'] = data['鏍囪�鎬诲垎鏁�'].apply(float)
|
|
|
+ # 璁$畻寰楀垎涓�100鐨勬暟閲忥紝纭�繚绫诲瀷鍖归厤
|
|
|
score_100_count = score_counts.get(100) if 100 in score_counts else 0
|
|
|
- # 创建MongoDB连接
|
|
|
- client = MongoClient('192.168.3.149', 27180) # 使用指定的地址和端口
|
|
|
- db = client['data_quality'] # 选择 'data_quality' 数据库
|
|
|
- score_collection = db['score'] # 选择 'score' 集合
|
|
|
+ # 鍒涘缓MongoDB杩炴帴
|
|
|
+ client = MongoClient('192.168.3.149', 27180) # 浣跨敤鎸囧畾鐨勫湴鍧€鍜岀�鍙�
|
|
|
+ db = client['data_quality'] # 閫夋嫨 'data_quality' 鏁版嵁搴�
|
|
|
+ score_collection = db['score'] # 閫夋嫨 'score' 闆嗗悎
|
|
|
|
|
|
- # 构建要存储到MongoDB的数据
|
|
|
+ # 鏋勫缓瑕佸瓨鍌ㄥ埌MongoDB鐨勬暟鎹�
|
|
|
data_to_store = {
|
|
|
'score': 100,
|
|
|
'score_number': score_100_count,
|
|
|
- 'timestamp': datetime.now() # 添加当前时间戳
|
|
|
+ 'timestamp': datetime.now() # 娣诲姞褰撳墠鏃堕棿鎴�
|
|
|
}
|
|
|
- # 使用 convert_numpy_int 函数确保所有数据都是 MongoDB 兼容的格式
|
|
|
+ # 浣跨敤 convert_numpy_int 鍑芥暟纭�繚鎵€鏈夋暟鎹�兘鏄� MongoDB 鍏煎�鐨勬牸寮�
|
|
|
data_to_store_converted = convert_numpy_int(data_to_store)
|
|
|
- # 存储转换后的数据到新指定的MongoDB集合中
|
|
|
+ # 瀛樺偍杞�崲鍚庣殑鏁版嵁鍒版柊鎸囧畾鐨凪ongoDB闆嗗悎涓�
|
|
|
score_collection.insert_one(data_to_store_converted)
|
|
|
|
|
|
- # 百分比格式化为字符串,并附加百分号
|
|
|
- score_distribution_df['百分比'] = score_distribution_df['百分比'].apply(lambda x: f'{x:.2f}%')
|
|
|
+ # 鐧惧垎姣旀牸寮忓寲涓哄瓧绗︿覆锛屽苟闄勫姞鐧惧垎鍙�
|
|
|
+ score_distribution_df['鐧惧垎姣�'] = score_distribution_df['鐧惧垎姣�'].apply(lambda x: f'{x:.2f}%')
|
|
|
|
|
|
|
|
|
-# "purchasinglist" 下的 "score" 字段的分布
|
|
|
+# "purchasinglist" 涓嬬殑 "score" 瀛楁�鐨勫垎甯�
|
|
|
if 'purchasinglist' in data.columns:
|
|
|
- # 提取 "score" 并转换为浮点数
|
|
|
+ # 鎻愬彇 "score" 骞惰浆鎹�负娴�偣鏁�
|
|
|
purchasinglist_scores = data['purchasinglist'].map(
|
|
|
lambda x: float(x[0]['score']) if isinstance(x, list) and x and isinstance(x[0], dict) and 'score' in x[
|
|
|
0] else 0
|
|
@@ -222,83 +222,83 @@ if 'purchasinglist' in data.columns:
|
|
|
purchasinglist_total_scores = purchasinglist_scores.notnull().sum()
|
|
|
purchasinglist_score_percentages = (purchasinglist_score_counts / purchasinglist_total_scores) * 100
|
|
|
purchasinglist_score_distribution_df = pd.DataFrame({
|
|
|
- '标的物分数': purchasinglist_score_counts.index,
|
|
|
- '数量': purchasinglist_score_counts.values,
|
|
|
- '百分比': purchasinglist_score_percentages.values
|
|
|
+ '鏍囩殑鐗╁垎鏁�': purchasinglist_score_counts.index,
|
|
|
+ '鏁伴噺': purchasinglist_score_counts.values,
|
|
|
+ '鐧惧垎姣�': purchasinglist_score_percentages.values
|
|
|
})
|
|
|
- # 百分比格式化为字符串,并附加百分号
|
|
|
- purchasinglist_score_distribution_df['百分比'] = purchasinglist_score_distribution_df['百分比'].apply(
|
|
|
+ # 鐧惧垎姣旀牸寮忓寲涓哄瓧绗︿覆锛屽苟闄勫姞鐧惧垎鍙�
|
|
|
+ purchasinglist_score_distribution_df['鐧惧垎姣�'] = purchasinglist_score_distribution_df['鐧惧垎姣�'].apply(
|
|
|
lambda x: f'{x:.2f}%')
|
|
|
|
|
|
-# 对错误次数进行倒序排序
|
|
|
-expanded_analysis_results_df = expanded_analysis_results_df.sort_values(by='错误次数', ascending=False)
|
|
|
+# 瀵归敊璇��鏁拌繘琛屽€掑簭鎺掑簭
|
|
|
+expanded_analysis_results_df = expanded_analysis_results_df.sort_values(by='閿欒�娆℃暟', ascending=False)
|
|
|
|
|
|
-# MongoDB导出配置
|
|
|
-export_host = '192.168.3.149' # MongoDB主机地址
|
|
|
-export_port = 27180 # MongoDB端口
|
|
|
-export_dbname = 'data_quality' # 数据库名称
|
|
|
-export_collection_name = 'export' # 导出的集合名称
|
|
|
+# MongoDB瀵煎嚭閰嶇疆
|
|
|
+export_host = '192.168.3.149' # MongoDB涓绘満鍦板潃
|
|
|
+export_port = 27180 # MongoDB绔�彛
|
|
|
+export_dbname = 'data_quality' # 鏁版嵁搴撳悕绉�
|
|
|
+export_collection_name = 'export' # 瀵煎嚭鐨勯泦鍚堝悕绉�
|
|
|
|
|
|
-# 创建用于导出数据的MongoDB连接
|
|
|
+# 鍒涘缓鐢ㄤ簬瀵煎嚭鏁版嵁鐨凪ongoDB杩炴帴
|
|
|
export_client = MongoClient(export_host, export_port)
|
|
|
export_db = export_client[export_dbname]
|
|
|
export_collection = export_db[export_collection_name]
|
|
|
-# 将分析结果导入MongoDB
|
|
|
+# 灏嗗垎鏋愮粨鏋滃�鍏�ongoDB
|
|
|
for result in expanded_analysis_results:
|
|
|
- # 构建导出数据的格式
|
|
|
+ # 鏋勫缓瀵煎嚭鏁版嵁鐨勬牸寮�
|
|
|
export_entry = {
|
|
|
- 'error_cause': result['错误原因'],
|
|
|
- 'error_count': result['错误次数'],
|
|
|
- 'timestamp': datetime.now() # 添加当前时间戳
|
|
|
+ 'error_cause': result['閿欒�鍘熷洜'],
|
|
|
+ 'error_count': result['閿欒�娆℃暟'],
|
|
|
+ 'timestamp': datetime.now() # 娣诲姞褰撳墠鏃堕棿鎴�
|
|
|
}
|
|
|
- print(export_entry) # 查看时间戳是否正确生成
|
|
|
+ print(export_entry) # 鏌ョ湅鏃堕棿鎴虫槸鍚︽�纭�敓鎴�
|
|
|
|
|
|
- # 在插入之前应用转换
|
|
|
+ # 鍦ㄦ彃鍏ヤ箣鍓嶅簲鐢ㄨ浆鎹�
|
|
|
export_entry = convert_numpy_int(export_entry)
|
|
|
|
|
|
- # 插入数据到MongoDB集合
|
|
|
+ # 鎻掑叆鏁版嵁鍒癕ongoDB闆嗗悎
|
|
|
export_collection.insert_one(export_entry)
|
|
|
-# 关闭导出数据用的MongoDB连接
|
|
|
+# 鍏抽棴瀵煎嚭鏁版嵁鐢ㄧ殑MongoDB杩炴帴
|
|
|
export_client.close()
|
|
|
|
|
|
|
|
|
-# 使用 pd.ExcelWriter 进行写入操作
|
|
|
-with pd.ExcelWriter('临时文件.xlsx', engine='openpyxl') as writer:
|
|
|
- # 新建一个工作表 "分数分析结果"
|
|
|
- writer.sheets['分数分析结果'] = writer.book.create_sheet('分数分析结果')
|
|
|
- if "标讯总分数" in data.columns:
|
|
|
- # 添加总量列
|
|
|
- score_distribution_df['总量'] = total_scores
|
|
|
- # 对分数进行倒序排序
|
|
|
- score_distribution_df = score_distribution_df.sort_values(by='标讯总分数', ascending=False)
|
|
|
- score_distribution_df.to_excel(writer, sheet_name='分数分析结果', index=False)
|
|
|
+# 浣跨敤 pd.ExcelWriter 杩涜�鍐欏叆鎿嶄綔
|
|
|
+with pd.ExcelWriter('涓存椂鏂囦欢.xlsx', engine='openpyxl') as writer:
|
|
|
+ # 鏂板缓涓€涓�伐浣滆〃 "鍒嗘暟鍒嗘瀽缁撴灉"
|
|
|
+ writer.sheets['鍒嗘暟鍒嗘瀽缁撴灉'] = writer.book.create_sheet('鍒嗘暟鍒嗘瀽缁撴灉')
|
|
|
+ if "鏍囪�鎬诲垎鏁�" in data.columns:
|
|
|
+ # 娣诲姞鎬婚噺鍒�
|
|
|
+ score_distribution_df['鎬婚噺'] = total_scores
|
|
|
+ # 瀵瑰垎鏁拌繘琛屽€掑簭鎺掑簭
|
|
|
+ score_distribution_df = score_distribution_df.sort_values(by='鏍囪�鎬诲垎鏁�', ascending=False)
|
|
|
+ score_distribution_df.to_excel(writer, sheet_name='鍒嗘暟鍒嗘瀽缁撴灉', index=False)
|
|
|
|
|
|
- # 新建一列写入 "purchasinglist" 下的 "score" 分布
|
|
|
+ # 鏂板缓涓€鍒楀啓鍏� "purchasinglist" 涓嬬殑 "score" 鍒嗗竷
|
|
|
if 'purchasinglist' in data.columns and purchasinglist_scores.notnull().any():
|
|
|
- # 注意这里的startcol参数,它应该基于您的实际数据列数来设置
|
|
|
- purchasinglist_score_distribution_df = purchasinglist_score_distribution_df.sort_values(by='标的物分数',
|
|
|
+ # 娉ㄦ剰杩欓噷鐨剆tartcol鍙傛暟锛屽畠搴旇�鍩轰簬鎮ㄧ殑瀹為檯鏁版嵁鍒楁暟鏉ヨ�缃�
|
|
|
+ purchasinglist_score_distribution_df = purchasinglist_score_distribution_df.sort_values(by='鏍囩殑鐗╁垎鏁�',
|
|
|
ascending=False)
|
|
|
- purchasinglist_score_distribution_df.to_excel(writer, sheet_name='分数分析结果',
|
|
|
+ purchasinglist_score_distribution_df.to_excel(writer, sheet_name='鍒嗘暟鍒嗘瀽缁撴灉',
|
|
|
startcol=len(score_distribution_df.columns) + 2, index=False)
|
|
|
|
|
|
- # 添加总量列
|
|
|
- purchasinglist_score_distribution_df['总量'] = purchasinglist_total_scores
|
|
|
- purchasinglist_score_distribution_df.to_excel(writer, sheet_name='分数分析结果',
|
|
|
+ # 娣诲姞鎬婚噺鍒�
|
|
|
+ purchasinglist_score_distribution_df['鎬婚噺'] = purchasinglist_total_scores
|
|
|
+ purchasinglist_score_distribution_df.to_excel(writer, sheet_name='鍒嗘暟鍒嗘瀽缁撴灉',
|
|
|
startcol=len(score_distribution_df.columns) + 2, index=False)
|
|
|
|
|
|
- expanded_analysis_results_df.to_excel(writer, sheet_name='字段分析结果', index=False)
|
|
|
+ expanded_analysis_results_df.to_excel(writer, sheet_name='瀛楁�鍒嗘瀽缁撴灉', index=False)
|
|
|
|
|
|
-# 假设您的分析结果已经保存在一个临时文件中
|
|
|
-temp_analysis_file = '临时文件.xlsx' # 临时文件的路径
|
|
|
+# 鍋囪�鎮ㄧ殑鍒嗘瀽缁撴灉宸茬粡淇濆瓨鍦ㄤ竴涓�复鏃舵枃浠朵腑
|
|
|
+temp_analysis_file = '涓存椂鏂囦欢.xlsx' # 涓存椂鏂囦欢鐨勮矾寰�
|
|
|
|
|
|
-# 加载您想要合并结果到的Excel文件
|
|
|
-modified_file_path = 'pin.xlsx' #拼接文件路径
|
|
|
+# 鍔犺浇鎮ㄦ兂瑕佸悎骞剁粨鏋滃埌鐨凟xcel鏂囦欢
|
|
|
+modified_file_path = 'pin.xlsx' #鎷兼帴鏂囦欢璺�緞
|
|
|
wb = load_workbook(modified_file_path)
|
|
|
|
|
|
-# 加载包含分析结果的临时Excel文件
|
|
|
+# 鍔犺浇鍖呭惈鍒嗘瀽缁撴灉鐨勪复鏃禘xcel鏂囦欢
|
|
|
temp_wb = load_workbook(temp_analysis_file)
|
|
|
|
|
|
-# 将临时文件中的工作表复制到修改过的文件中
|
|
|
+# 灏嗕复鏃舵枃浠朵腑鐨勫伐浣滆〃澶嶅埗鍒颁慨鏀硅繃鐨勬枃浠朵腑
|
|
|
for sheet_name in temp_wb.sheetnames:
|
|
|
source = temp_wb[sheet_name]
|
|
|
target = wb.create_sheet(sheet_name)
|
|
@@ -306,6 +306,6 @@ for sheet_name in temp_wb.sheetnames:
|
|
|
for row in source.iter_rows(min_row=1, max_col=source.max_column, max_row=source.max_row, values_only=True):
|
|
|
target.append(row)
|
|
|
|
|
|
-# 保存最终的合并文件
|
|
|
-final_merged_file_path = '质量分析报告.xlsx' # 最终合并文件的路径
|
|
|
-wb.save(final_merged_file_path)
|
|
|
+# 淇濆瓨鏈€缁堢殑鍚堝苟鏂囦欢
|
|
|
+final_merged_file_path = '璐ㄩ噺鍒嗘瀽鎶ュ憡.xlsx' # 鏈€缁堝悎骞舵枃浠剁殑璺�緞
|
|
|
+wb.save(final_merged_file_path)
|