Create grace_eval.py
Browse files- grace_eval.py +15 -0
grace_eval.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from evaluate.visualization import radar_plot
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
|
4 |
+
def compute_sample_scores(results, prompt):
|
5 |
+
# 示例人工评分(实际可扩展为平均多个 prompt 的客观指标)
|
6 |
+
return {
|
7 |
+
"sd_v1_5": {"G": 4, "R": 4, "A": 4, "E": 3},
|
8 |
+
"openjourney_v4": {"G": 3, "R": 4, "A": 5, "E": 3},
|
9 |
+
"ldm_256": {"G": 2, "R": 3, "A": 3, "E": 5}
|
10 |
+
}
|
11 |
+
|
12 |
+
def plot_radar(scores_dict, out_path="radar.png"):
|
13 |
+
radar_plot(scores_dict, model_names=list(scores_dict.keys()), title="GRACE Radar Comparison")
|
14 |
+
plt.savefig(out_path)
|
15 |
+
plt.close()
|