big_ai / grace_eval.py
linimi's picture
Create grace_eval.py
03b8479 verified
raw
history blame
583 Bytes
from evaluate.visualization import radar_plot
import matplotlib.pyplot as plt
def compute_sample_scores(results, prompt):
# 示例人工评分(实际可扩展为平均多个 prompt 的客观指标)
return {
"sd_v1_5": {"G": 4, "R": 4, "A": 4, "E": 3},
"openjourney_v4": {"G": 3, "R": 4, "A": 5, "E": 3},
"ldm_256": {"G": 2, "R": 3, "A": 3, "E": 5}
}
def plot_radar(scores_dict, out_path="radar.png"):
radar_plot(scores_dict, model_names=list(scores_dict.keys()), title="GRACE Radar Comparison")
plt.savefig(out_path)
plt.close()