Quazim0t0 commited on
Commit
98ee42b
·
verified ·
1 Parent(s): 968f2d4

Update leaderboard.py

Browse files
Files changed (1) hide show
  1. leaderboard.py +241 -1
leaderboard.py CHANGED
@@ -153,4 +153,244 @@ class Leaderboard:
153
  xaxis_title="Tag",
154
  yaxis_title="Count"
155
  )
156
- return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  xaxis_title="Tag",
154
  yaxis_title="Count"
155
  )
156
+ return fig
157
+
158
+ # Count models by tag
159
+ tag_counts = df['tag'].value_counts().reset_index()
160
+ tag_counts.columns = ['Tag', 'Count']
161
+
162
+ # Create pie chart
163
+ fig = px.pie(
164
+ tag_counts,
165
+ names='Tag',
166
+ values='Count',
167
+ title='Model Distribution by Tag',
168
+ color='Tag',
169
+ color_discrete_map=self.tag_colors
170
+ )
171
+
172
+ # Customize layout
173
+ fig.update_layout(
174
+ font=dict(size=12)
175
+ )
176
+
177
+ return fig
178
+
179
+ def create_benchmark_comparison_chart(self, df):
180
+ """Create a chart comparing performance across benchmarks.
181
+
182
+ Args:
183
+ df: Leaderboard DataFrame
184
+
185
+ Returns:
186
+ plotly.graph_objects.Figure: Benchmark comparison chart
187
+ """
188
+ if df.empty:
189
+ # Return empty figure
190
+ fig = go.Figure()
191
+ fig.update_layout(
192
+ title="No data available",
193
+ xaxis_title="Benchmark",
194
+ yaxis_title="Average Score"
195
+ )
196
+ return fig
197
+
198
+ # Calculate average score by benchmark
199
+ benchmark_avg = df.groupby('benchmark_name')['score'].mean().reset_index()
200
+ benchmark_avg.columns = ['Benchmark', 'Average Score']
201
+
202
+ # Create bar chart
203
+ fig = px.bar(
204
+ benchmark_avg,
205
+ x='Benchmark',
206
+ y='Average Score',
207
+ title='Average Performance by Benchmark',
208
+ color='Benchmark'
209
+ )
210
+
211
+ # Customize layout
212
+ fig.update_layout(
213
+ xaxis_title="Benchmark",
214
+ yaxis_title="Average Score",
215
+ font=dict(size=12)
216
+ )
217
+
218
+ return fig
219
+
220
+ # Leaderboard UI components
221
+ def create_leaderboard_ui(leaderboard, db_manager):
222
+ """Create the leaderboard UI components.
223
+
224
+ Args:
225
+ leaderboard: Leaderboard instance
226
+ db_manager: Database manager instance
227
+
228
+ Returns:
229
+ gr.Blocks: Gradio Blocks component with leaderboard UI
230
+ """
231
+ with gr.Blocks() as leaderboard_ui:
232
+ gr.Markdown("# Dynamic Highscores Leaderboard")
233
+
234
+ with gr.Row():
235
+ with gr.Column(scale=1):
236
+ tag_filter = gr.Dropdown(
237
+ choices=leaderboard.model_tags,
238
+ value="All",
239
+ label="Filter by Tag"
240
+ )
241
+
242
+ benchmark_filter = gr.Dropdown(
243
+ choices=[("all", "All Benchmarks")],
244
+ value="all",
245
+ label="Filter by Benchmark"
246
+ )
247
+
248
+ refresh_button = gr.Button("Refresh Leaderboard")
249
+
250
+ with gr.Column(scale=2):
251
+ chart_type = gr.Radio(
252
+ choices=["bar", "scatter"],
253
+ value="bar",
254
+ label="Chart Type"
255
+ )
256
+
257
+ view_type = gr.Radio(
258
+ choices=["Table", "Chart", "Dashboard"],
259
+ value="Table",
260
+ label="View Type"
261
+ )
262
+
263
+ # Table view
264
+ leaderboard_table = gr.Dataframe(
265
+ headers=["Model", "Benchmark", "Tag", "Score", "Completed"],
266
+ label="Leaderboard",
267
+ visible=True
268
+ )
269
+
270
+ # Chart view
271
+ with gr.Row(visible=False) as chart_view:
272
+ performance_chart = gr.Plot(label="Performance Chart")
273
+
274
+ # Dashboard view
275
+ with gr.Row(visible=False) as dashboard_view:
276
+ with gr.Column(scale=2):
277
+ dashboard_performance_chart = gr.Plot(label="Performance Comparison")
278
+
279
+ with gr.Column(scale=1):
280
+ with gr.Row():
281
+ tag_distribution_chart = gr.Plot(label="Model Distribution")
282
+
283
+ with gr.Row():
284
+ benchmark_comparison_chart = gr.Plot(label="Benchmark Comparison")
285
+
286
+ # Event handlers
287
+ def refresh_benchmarks():
288
+ try:
289
+ benchmarks = db_manager.get_benchmarks()
290
+
291
+ # Format for dropdown
292
+ choices = [("all", "All Benchmarks")]
293
+ choices.extend([(str(b["id"]), b["name"]) for b in benchmarks])
294
+
295
+ return gr.update(choices=choices)
296
+ except Exception as e:
297
+ print(f"Error refreshing benchmarks: {e}")
298
+ return gr.update(choices=[("all", "All Benchmarks")])
299
+
300
+ def update_leaderboard(tag, benchmark_id, chart_type_val, view_type_val):
301
+ try:
302
+ # Get leaderboard data
303
+ if benchmark_id == "all":
304
+ benchmark_id = None
305
+
306
+ df = leaderboard.get_leaderboard_data(tag=tag, benchmark_id=benchmark_id)
307
+
308
+ # Format for display
309
+ display_df = leaderboard.format_leaderboard_for_display(df)
310
+
311
+ # Create charts
312
+ perf_chart = leaderboard.create_performance_chart(df, chart_type=chart_type_val)
313
+ tag_chart = leaderboard.create_tag_distribution_chart(df)
314
+ benchmark_chart = leaderboard.create_benchmark_comparison_chart(df)
315
+
316
+ # Update visibility based on view type
317
+ table_visible = view_type_val == "Table"
318
+ chart_visible = view_type_val == "Chart"
319
+ dashboard_visible = view_type_val == "Dashboard"
320
+
321
+ return (
322
+ display_df,
323
+ perf_chart,
324
+ perf_chart, # Same chart for both views
325
+ tag_chart,
326
+ benchmark_chart,
327
+ gr.update(visible=table_visible),
328
+ gr.update(visible=chart_visible),
329
+ gr.update(visible=dashboard_visible)
330
+ )
331
+ except Exception as e:
332
+ print(f"Error updating leaderboard: {e}")
333
+ empty_df = pd.DataFrame(columns=['Model', 'Benchmark', 'Tag', 'Score', 'Completed'])
334
+ empty_chart = go.Figure()
335
+ empty_chart.update_layout(title="Error loading data")
336
+
337
+ return (
338
+ empty_df,
339
+ empty_chart,
340
+ empty_chart,
341
+ empty_chart,
342
+ empty_chart,
343
+ gr.update(visible=True),
344
+ gr.update(visible=False),
345
+ gr.update(visible=False)
346
+ )
347
+
348
+ # Connect event handlers
349
+ refresh_button.click(
350
+ fn=lambda tag, benchmark, chart_t, view_t: update_leaderboard(tag, benchmark, chart_t, view_t),
351
+ inputs=[tag_filter, benchmark_filter, chart_type, view_type],
352
+ outputs=[
353
+ leaderboard_table,
354
+ performance_chart,
355
+ dashboard_performance_chart,
356
+ tag_distribution_chart,
357
+ benchmark_comparison_chart,
358
+ leaderboard_table,
359
+ chart_view,
360
+ dashboard_view
361
+ ]
362
+ )
363
+
364
+ view_type.change(
365
+ fn=lambda view_t: (
366
+ gr.update(visible=view_t == "Table"),
367
+ gr.update(visible=view_t == "Chart"),
368
+ gr.update(visible=view_t == "Dashboard")
369
+ ),
370
+ inputs=[view_type],
371
+ outputs=[leaderboard_table, chart_view, dashboard_view]
372
+ )
373
+
374
+ # Initialize on load
375
+ leaderboard_ui.load(
376
+ fn=refresh_benchmarks,
377
+ inputs=[],
378
+ outputs=[benchmark_filter]
379
+ )
380
+
381
+ leaderboard_ui.load(
382
+ fn=lambda: update_leaderboard("All", "all", "bar", "Table"),
383
+ inputs=[],
384
+ outputs=[
385
+ leaderboard_table,
386
+ performance_chart,
387
+ dashboard_performance_chart,
388
+ tag_distribution_chart,
389
+ benchmark_comparison_chart,
390
+ leaderboard_table,
391
+ chart_view,
392
+ dashboard_view
393
+ ]
394
+ )
395
+
396
+ return leaderboard_ui