|
@@ -0,0 +1,66 @@
|
|
|
+import pandas as pd
|
|
|
+import cpca
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+import openpyxl
|
|
|
+
|
|
|
+# 读取Excel文件并提取标题
|
|
|
+filename = 'C:\\Users\\25503\\PycharmProjects\\pythonProject\\dataqa\\2023-07-08.xlsx'
|
|
|
+
|
|
|
+df = pd.read_excel(filename)
|
|
|
+titles = df['title'].tolist()
|
|
|
+
|
|
|
+# 使用cpca库进行城市名称提取和省份转换,并将结果与"erea"列的值进行对比
|
|
|
+provinces = []
|
|
|
+correct_flags = []
|
|
|
+for title in titles:
|
|
|
+ df_temp = cpca.transform([title])
|
|
|
+ province = df_temp['省'][0]
|
|
|
+ provinces.append(province)
|
|
|
+
|
|
|
+ area = df.loc[df['title'] == title, 'area'].values[0]
|
|
|
+ area_province = cpca.transform([area])['省'][0]
|
|
|
+
|
|
|
+ correct = province == area_province
|
|
|
+ correct_flags.append(correct)
|
|
|
+
|
|
|
+ # 打印标记过程
|
|
|
+ print(f"Title: {title}")
|
|
|
+ print(f"Predicted Province: {province}")
|
|
|
+ print(f"Area: {area}")
|
|
|
+ print(f"Area Province: {area_province}")
|
|
|
+ print(f"Correct: {correct}")
|
|
|
+ print("-" * 30)
|
|
|
+
|
|
|
+df['predicted_province'] = provinces
|
|
|
+df['correct'] = correct_flags
|
|
|
+
|
|
|
+# 计算正确率和错误率
|
|
|
+total_count = len(df)
|
|
|
+correct_count = df['correct'].sum()
|
|
|
+error_count = total_count - correct_count
|
|
|
+
|
|
|
+accuracy = correct_count / total_count
|
|
|
+error_rate = error_count / total_count
|
|
|
+
|
|
|
+print(f"Total Count: {total_count}")
|
|
|
+print(f"Correct Count: {correct_count}")
|
|
|
+print(f"Error Count: {error_count}")
|
|
|
+print(f"Accuracy: {accuracy}")
|
|
|
+print(f"Error Rate: {error_rate}")
|
|
|
+
|
|
|
+# 生成结果分析图表
|
|
|
+labels = ['Correct', 'Error']
|
|
|
+counts = [correct_count, error_count]
|
|
|
+
|
|
|
+plt.bar(labels, counts)
|
|
|
+plt.xlabel('Result')
|
|
|
+plt.ylabel('Count')
|
|
|
+plt.title('Result Analysis')
|
|
|
+plt.show()
|
|
|
+
|
|
|
+# 导出带有标记的数据到Excel文件
|
|
|
+output_filename = 'data.xlsx'
|
|
|
+
|
|
|
+# 使用 'with' 语句来保存和自动关闭文件
|
|
|
+with pd.ExcelWriter(output_filename, engine='openpyxl') as writer:
|
|
|
+ df.to_excel(writer, index=False)
|