from evaluate.visualization import radar_plot | |
import matplotlib.pyplot as plt | |
def compute_sample_scores(results, prompt): | |
# 示例人工评分(实际可扩展为平均多个 prompt 的客观指标) | |
return { | |
"sd_v1_5": {"G": 4, "R": 4, "A": 4, "E": 3}, | |
"openjourney_v4": {"G": 3, "R": 4, "A": 5, "E": 3}, | |
"ldm_256": {"G": 2, "R": 3, "A": 3, "E": 5} | |
} | |
def plot_radar(scores_dict, out_path="radar.png"): | |
radar_plot(scores_dict, model_names=list(scores_dict.keys()), title="GRACE Radar Comparison") | |
plt.savefig(out_path) | |
plt.close() | |