Update grace_eval.py
Browse files- grace_eval.py +20 -5
grace_eval.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
-
from evaluate.visualization import radar_plot
|
2 |
import matplotlib.pyplot as plt
|
3 |
|
4 |
def compute_sample_scores(results, prompt):
|
5 |
-
#
|
6 |
return {
|
7 |
"sd_v1_5": {"G": 4, "R": 4, "A": 4, "E": 3},
|
8 |
"openjourney_v4": {"G": 3, "R": 4, "A": 5, "E": 3},
|
@@ -10,6 +9,22 @@ def compute_sample_scores(results, prompt):
|
|
10 |
}
|
11 |
|
12 |
def plot_radar(scores_dict, out_path="radar.png"):
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
|
3 |
def compute_sample_scores(results, prompt):
|
4 |
+
# 示例评分 - 实际应用中可以用更复杂的评估逻辑
|
5 |
return {
|
6 |
"sd_v1_5": {"G": 4, "R": 4, "A": 4, "E": 3},
|
7 |
"openjourney_v4": {"G": 3, "R": 4, "A": 5, "E": 3},
|
|
|
9 |
}
|
10 |
|
11 |
def plot_radar(scores_dict, out_path="radar.png"):
|
12 |
+
categories = list(scores_dict.values())[0].keys()
|
13 |
+
N = len(categories)
|
14 |
+
|
15 |
+
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
|
16 |
+
|
17 |
+
for model, scores in scores_dict.items():
|
18 |
+
values = list(scores.values())
|
19 |
+
values += values[:1] # 闭合雷达图
|
20 |
+
angles = [n / float(N) * 2 * 3.14159 for n in range(N)]
|
21 |
+
angles += angles[:1]
|
22 |
+
ax.plot(angles, values, linewidth=1, linestyle='solid', label=model)
|
23 |
+
ax.fill(angles, values, alpha=0.1)
|
24 |
+
|
25 |
+
ax.set_xticks(angles[:-1])
|
26 |
+
ax.set_xticklabels(categories)
|
27 |
+
ax.set_title("GRACE 评估雷达图 (CPU模式)", size=12, y=1.1)
|
28 |
+
ax.legend(loc='upper right')
|
29 |
+
plt.savefig(out_path, bbox_inches='tight')
|
30 |
+
plt.close()
|