Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -166,10 +166,12 @@ def process_and_visualize(file_content):
|
|
166 |
|
167 |
output_text = f"Average Performance per Model:\n{averages.sort_values(by='Average Performance').to_string()}"
|
168 |
|
169 |
-
return output_text, image_avg, image_line, image_heatmap, image_boxplot, plotly_avg, plotly_tasks
|
170 |
|
171 |
if __name__ == "__main__":
|
172 |
|
|
|
|
|
173 |
iface = gr.Interface(
|
174 |
fn=process_and_visualize,
|
175 |
inputs=gr.Textbox(lines=10, label="Paste your data here"),
|
@@ -180,8 +182,10 @@ if __name__ == "__main__":
|
|
180 |
gr.Image(label="Matplotlib Task Performance Heatmap"),
|
181 |
gr.Image(label="Matplotlib Performance Distribution Boxplot"),
|
182 |
gr.HTML(label="Plotly Average Performance Chart"),
|
183 |
-
gr.
|
184 |
-
[gr.HTML(label=f"Plotly {task} Chart") for task in
|
|
|
|
|
185 |
],
|
186 |
title="LLM Benchmark Visualizer",
|
187 |
description="Upload your LLM benchmark data and visualize the results."
|
|
|
166 |
|
167 |
output_text = f"Average Performance per Model:\n{averages.sort_values(by='Average Performance').to_string()}"
|
168 |
|
169 |
+
return output_text, image_avg, image_line, image_heatmap, image_boxplot, plotly_avg, plotly_tasks, plotly_tasks
|
170 |
|
171 |
if __name__ == "__main__":
|
172 |
|
173 |
+
task_names = ['tinyArc', 'tinyHellaswag', 'tinyMMLU', 'tinyTruthfulQA', 'tinyTruthfulQA_mc1', 'tinyWinogrande']
|
174 |
+
|
175 |
iface = gr.Interface(
|
176 |
fn=process_and_visualize,
|
177 |
inputs=gr.Textbox(lines=10, label="Paste your data here"),
|
|
|
182 |
gr.Image(label="Matplotlib Task Performance Heatmap"),
|
183 |
gr.Image(label="Matplotlib Performance Distribution Boxplot"),
|
184 |
gr.HTML(label="Plotly Average Performance Chart"),
|
185 |
+
gr.TabbedInterface(
|
186 |
+
[gr.HTML(label=f"Plotly {task} Chart") for task in task_names],
|
187 |
+
label="Task Charts"
|
188 |
+
),
|
189 |
],
|
190 |
title="LLM Benchmark Visualizer",
|
191 |
description="Upload your LLM benchmark data and visualize the results."
|