rohansampath commited on
Commit
2010e21
·
verified ·
1 Parent(s): eb7de2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -47
app.py CHANGED
@@ -87,11 +87,16 @@ def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, all_questions, nu
87
  )
88
 
89
  # Return values that re-enable UI components after completion
90
- return (report, results_df,
91
- gr.update(interactive=True), gr.update(visible=False),
92
- gr.update(interactive=True), gr.update(interactive=True),
93
- gr.update(interactive=True), gr.update(interactive=True),
94
- gr.update(interactive=True))
 
 
 
 
 
95
 
96
  except Exception as e:
97
  # Handle errors gracefully
@@ -99,11 +104,48 @@ def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, all_questions, nu
99
  error_message = f"### Error during evaluation\n```\n{error_trace}\n```"
100
 
101
  # Re-enable UI components on error
102
- return (error_message, None,
103
- gr.update(interactive=True), gr.update(visible=False),
104
- gr.update(interactive=True), gr.update(interactive=True),
105
- gr.update(interactive=True), gr.update(interactive=True),
106
- gr.update(interactive=True))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  # ---------------------------------------------------------------------------
109
  # 3. Gradio Interface
@@ -115,27 +157,32 @@ with gr.Blocks() as demo:
115
  """)
116
 
117
  # Dataset Selection Section
118
- gr.Markdown("### (A) Select Dataset for evaluation")
119
 
120
  with gr.Row():
121
  dataset_dropdown = gr.Dropdown(
122
- choices=["MMLU-Pro"],
123
- value=None,
124
  label="Dataset",
125
  info="Select a dataset to evaluate the model on"
126
  )
127
- preview_button = gr.Button("Show Preview", interactive=False)
128
 
129
  # Dataset Preview Container - Initially hidden
130
- with gr.Group(visible=False) as dataset_preview_container:
 
131
  preview_output = gr.DataFrame(
132
- label="Dataset Preview",
133
- interactive=False
 
134
  )
 
 
 
135
 
136
  # MMLU Config Container - Initially hidden until dataset is selected
137
- with gr.Group(visible=False) as mmlu_config_container:
138
- gr.Markdown("### MMLU-Pro Evaluation Configuration")
139
 
140
  with gr.Row():
141
  all_subjects_checkbox = gr.Checkbox(
@@ -191,52 +238,70 @@ with gr.Blocks() as demo:
191
  cancel_mmlu_button = gr.Button("Cancel Evaluation", variant="stop", visible=False)
192
 
193
  # Results Section - Initially hidden
194
- with gr.Group(visible=False) as results_container:
195
  results_output = gr.Markdown(label="Evaluation Results")
196
 
197
- with gr.Row():
198
- results_table = gr.DataFrame(interactive=True, label="Detailed Results (Sortable)", visible=True)
 
 
 
 
 
 
199
 
200
- # Track evaluation state - used to prevent multiple evaluations
201
  evaluation_state = gr.State({"running": False})
 
202
 
203
- # Function to show configuration based on selected dataset
204
  def update_interface_based_on_dataset(dataset):
205
  if dataset == "MMLU-Pro":
206
  return (
207
  gr.update(visible=True), # mmlu_config_container
208
  gr.update(visible=True), # results_container
209
- gr.update(interactive=True) # preview_button
210
  )
211
  else:
212
  return (
213
  gr.update(visible=False), # mmlu_config_container
214
  gr.update(visible=False), # results_container
215
- gr.update(interactive=False) # preview_button
216
  )
217
 
218
  # Connect dataset dropdown to show/hide appropriate configuration
219
  dataset_dropdown.change(
220
  fn=update_interface_based_on_dataset,
221
  inputs=[dataset_dropdown],
222
- outputs=[mmlu_config_container, results_container, preview_button]
223
  )
224
 
225
- # Function to show dataset preview
226
- def show_dataset_preview(dataset):
227
- if dataset == "MMLU-Pro":
 
 
 
 
 
228
  preview_data = mmlupro_dataset_preview()
229
- formatted_preview = format_preview_for_display(preview_data)
230
- return gr.update(visible=True), formatted_preview
231
- else:
 
232
  # For other datasets (not implemented yet)
233
- return gr.update(visible=False), None
 
 
 
 
 
234
 
235
- # Connect preview button to show dataset information
236
- preview_button.click(
237
- fn=show_dataset_preview,
238
- inputs=[dataset_dropdown],
239
- outputs=[dataset_preview_container, preview_output]
240
  )
241
 
242
  # Update num_subjects_slider interactivity based on all_subjects checkbox
@@ -273,9 +338,10 @@ with gr.Blocks() as demo:
273
  gr.update(interactive=False),
274
  gr.update(interactive=False),
275
  gr.update(interactive=False),
276
- gr.update(visible=False),
277
  "Evaluation already in progress. Please wait.",
278
- None
 
279
  ]
280
 
281
  # Update state to running
@@ -291,7 +357,8 @@ with gr.Blocks() as demo:
291
  gr.update(interactive=False), # eval_mmlu_button
292
  gr.update(visible=True), # cancel_mmlu_button
293
  "Starting evaluation...", # results_output
294
- None # results_table
 
295
  ]
296
 
297
  # Function to reset UI after evaluation
@@ -314,7 +381,8 @@ with gr.Blocks() as demo:
314
  gr.update(interactive=True), # eval_mmlu_button
315
  gr.update(visible=False), # cancel_mmlu_button
316
  "⚠️ Evaluation canceled by user (note: backend process may continue running)", # results_output
317
- None # results_table
 
318
  ]
319
 
320
  # Connect MMLU evaluation button with state tracking
@@ -331,7 +399,8 @@ with gr.Blocks() as demo:
331
  eval_mmlu_button,
332
  cancel_mmlu_button,
333
  results_output,
334
- results_table
 
335
  ]
336
  ).then(
337
  fn=run_mmlu_evaluation,
@@ -351,7 +420,8 @@ with gr.Blocks() as demo:
351
  num_subjects_slider,
352
  num_shots_slider,
353
  all_questions_checkbox,
354
- num_questions_slider
 
355
  ]
356
  ).then(
357
  fn=finish_evaluation,
@@ -373,8 +443,22 @@ with gr.Blocks() as demo:
373
  eval_mmlu_button,
374
  cancel_mmlu_button,
375
  results_output,
376
- results_table
 
377
  ]
378
  )
379
 
380
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  )
88
 
89
  # Return values that re-enable UI components after completion
90
+ return (report,
91
+ results_df,
92
+ gr.update(interactive=True),
93
+ gr.update(visible=False),
94
+ gr.update(interactive=True),
95
+ gr.update(interactive=True),
96
+ gr.update(interactive=True),
97
+ gr.update(interactive=True),
98
+ gr.update(interactive=True),
99
+ gr.update(visible=True))
100
 
101
  except Exception as e:
102
  # Handle errors gracefully
 
104
  error_message = f"### Error during evaluation\n```\n{error_trace}\n```"
105
 
106
  # Re-enable UI components on error
107
+ return (error_message,
108
+ None,
109
+ gr.update(interactive=True),
110
+ gr.update(visible=False),
111
+ gr.update(interactive=True),
112
+ gr.update(interactive=True),
113
+ gr.update(interactive=True),
114
+ gr.update(interactive=True),
115
+ gr.update(interactive=True),
116
+ gr.update(visible=False))
117
+
118
+ def format_links_with_bullets(links_text):
119
+ """Format links with bullet points for better readability"""
120
+ lines = links_text.split('\n')
121
+ return "• " + "\n• ".join(lines)
122
+
123
+ # Function to format dataset preview for better display
124
+ def enhanced_format_preview_for_display(preview_data):
125
+ """
126
+ Format the preview data with improved readability
127
+ """
128
+ # Create links with bullet points
129
+ links_value = (
130
+ f"Hugging Face: {preview_data['links']['huggingface']}\n"
131
+ f"GitHub: {preview_data['links']['github']}\n"
132
+ f"Paper: {preview_data['links']['paper']}"
133
+ )
134
+ links_formatted = format_links_with_bullets(links_value)
135
+
136
+ # Create a table format with better column names
137
+ rows = [
138
+ {"Dataset Property": "Dataset Name", "Details": preview_data["dataset_name"]},
139
+ {"Dataset Property": "Evaluation Type", "Details": preview_data["evaluation_type"]},
140
+ {"Dataset Property": "Description", "Details": preview_data["description"]},
141
+ {"Dataset Property": "Links", "Details": links_formatted},
142
+ {"Dataset Property": "Organization", "Details": preview_data["organization"]},
143
+ {"Dataset Property": "Number of Questions", "Details": preview_data["num_questions"]},
144
+ {"Dataset Property": "Number of Input Tokens", "Details": preview_data["input_tokens"]},
145
+ {"Dataset Property": "Estimated Evaluation Time", "Details": f"{preview_data['evaluation_time']['total_time_minutes']} minutes (for 2 models on A100)"}
146
+ ]
147
+
148
+ return pd.DataFrame(rows)
149
 
150
  # ---------------------------------------------------------------------------
151
  # 3. Gradio Interface
 
157
  """)
158
 
159
  # Dataset Selection Section
160
+ gr.Markdown("## (A) Select Dataset for evaluation")
161
 
162
  with gr.Row():
163
  dataset_dropdown = gr.Dropdown(
164
+ choices=["(Select Dataset)", "MMLU-Pro"],
165
+ value="(Select Dataset)",
166
  label="Dataset",
167
  info="Select a dataset to evaluate the model on"
168
  )
169
+ preview_toggle = gr.Button("Show Preview", interactive=False, variant="secondary")
170
 
171
  # Dataset Preview Container - Initially hidden
172
+ with gr.Column(visible=False) as dataset_preview_container:
173
+ gr.Markdown("## Dataset Preview", elem_id="preview_header")
174
  preview_output = gr.DataFrame(
175
+ interactive=False,
176
+ wrap=True,
177
+ elem_id="preview_table"
178
  )
179
+ # Add vertical space after the preview
180
+ gr.Markdown(" ")
181
+ gr.Markdown(" ")
182
 
183
  # MMLU Config Container - Initially hidden until dataset is selected
184
+ with gr.Column(visible=False) as mmlu_config_container:
185
+ gr.Markdown("## (B) Select Dataset Configuration Options")
186
 
187
  with gr.Row():
188
  all_subjects_checkbox = gr.Checkbox(
 
238
  cancel_mmlu_button = gr.Button("Cancel Evaluation", variant="stop", visible=False)
239
 
240
  # Results Section - Initially hidden
241
+ with gr.Column(visible=False) as results_container:
242
  results_output = gr.Markdown(label="Evaluation Results")
243
 
244
+ # Results table - Initially hidden until evaluation completes
245
+ with gr.Column(visible=False) as results_table_container:
246
+ with gr.Row():
247
+ results_table = gr.DataFrame(
248
+ interactive=True,
249
+ label="Detailed Results (Sortable)",
250
+ visible=True
251
+ )
252
 
253
+ # Track evaluation state and preview state
254
  evaluation_state = gr.State({"running": False})
255
+ preview_state = gr.State({"visible": False})
256
 
257
+ # Function to show/hide configuration based on selected dataset
258
  def update_interface_based_on_dataset(dataset):
259
  if dataset == "MMLU-Pro":
260
  return (
261
  gr.update(visible=True), # mmlu_config_container
262
  gr.update(visible=True), # results_container
263
+ gr.update(interactive=True) # preview_toggle
264
  )
265
  else:
266
  return (
267
  gr.update(visible=False), # mmlu_config_container
268
  gr.update(visible=False), # results_container
269
+ gr.update(interactive=False) # preview_toggle
270
  )
271
 
272
  # Connect dataset dropdown to show/hide appropriate configuration
273
  dataset_dropdown.change(
274
  fn=update_interface_based_on_dataset,
275
  inputs=[dataset_dropdown],
276
+ outputs=[mmlu_config_container, results_container, preview_toggle]
277
  )
278
 
279
+ # Function to toggle dataset preview visibility
280
+ def toggle_preview(state, dataset):
281
+ # Toggle visibility state
282
+ new_visible = not state["visible"]
283
+ state["visible"] = new_visible
284
+
285
+ # If becoming visible, get the preview data
286
+ if new_visible and dataset == "MMLU-Pro":
287
  preview_data = mmlupro_dataset_preview()
288
+ formatted_preview = enhanced_format_preview_for_display(preview_data)
289
+ button_text = "Hide Preview"
290
+ return state, gr.update(visible=True), formatted_preview, gr.update(value=button_text)
291
+ elif new_visible:
292
  # For other datasets (not implemented yet)
293
+ button_text = "Hide Preview"
294
+ return state, gr.update(visible=True), None, gr.update(value=button_text)
295
+ else:
296
+ # Hiding the preview
297
+ button_text = "Show Preview"
298
+ return state, gr.update(visible=False), None, gr.update(value=button_text)
299
 
300
+ # Connect preview toggle to show/hide dataset information
301
+ preview_toggle.click(
302
+ fn=toggle_preview,
303
+ inputs=[preview_state, dataset_dropdown],
304
+ outputs=[preview_state, dataset_preview_container, preview_output, preview_toggle]
305
  )
306
 
307
  # Update num_subjects_slider interactivity based on all_subjects checkbox
 
338
  gr.update(interactive=False),
339
  gr.update(interactive=False),
340
  gr.update(interactive=False),
341
+ gr.update(visible=True),
342
  "Evaluation already in progress. Please wait.",
343
+ None,
344
+ gr.update(visible=False)
345
  ]
346
 
347
  # Update state to running
 
357
  gr.update(interactive=False), # eval_mmlu_button
358
  gr.update(visible=True), # cancel_mmlu_button
359
  "Starting evaluation...", # results_output
360
+ None, # results_table
361
+ gr.update(visible=False) # results_table_container
362
  ]
363
 
364
  # Function to reset UI after evaluation
 
381
  gr.update(interactive=True), # eval_mmlu_button
382
  gr.update(visible=False), # cancel_mmlu_button
383
  "⚠️ Evaluation canceled by user (note: backend process may continue running)", # results_output
384
+ None, # results_table
385
+ gr.update(visible=False) # results_table_container
386
  ]
387
 
388
  # Connect MMLU evaluation button with state tracking
 
399
  eval_mmlu_button,
400
  cancel_mmlu_button,
401
  results_output,
402
+ results_table,
403
+ results_table_container
404
  ]
405
  ).then(
406
  fn=run_mmlu_evaluation,
 
420
  num_subjects_slider,
421
  num_shots_slider,
422
  all_questions_checkbox,
423
+ num_questions_slider,
424
+ results_table_container
425
  ]
426
  ).then(
427
  fn=finish_evaluation,
 
443
  eval_mmlu_button,
444
  cancel_mmlu_button,
445
  results_output,
446
+ results_table,
447
+ results_table_container
448
  ]
449
  )
450
 
451
+ # Add custom CSS for styling
452
+ css = """
453
+ #preview_header {
454
+ margin-bottom: 10px;
455
+ margin-top: 5px;
456
+ }
457
+ #preview_table {
458
+ background-color: #f8f9fa;
459
+ border-radius: 8px;
460
+ padding: 10px;
461
+ }
462
+ """
463
+
464
+ demo.launch(css=css)