Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -136,9 +136,9 @@ with gr.Blocks(css="""
|
|
136 |
h1 {
|
137 |
text-align: center;
|
138 |
}
|
139 |
-
.section-
|
140 |
-
|
141 |
-
margin
|
142 |
}
|
143 |
.config-box {
|
144 |
border: 1px solid #ddd;
|
@@ -158,7 +158,7 @@ with gr.Blocks(css="""
|
|
158 |
""")
|
159 |
|
160 |
# Dataset Selection Section
|
161 |
-
gr.Markdown("## (A) Select Dataset for Evaluation"
|
162 |
|
163 |
with gr.Row():
|
164 |
dataset_dropdown = gr.Dropdown(
|
@@ -177,20 +177,15 @@ with gr.Blocks(css="""
|
|
177 |
wrap=True,
|
178 |
elem_id="preview_table"
|
179 |
)
|
180 |
-
# Add
|
181 |
-
gr.Markdown("
|
182 |
-
gr.Markdown(" ")
|
183 |
|
184 |
-
#
|
185 |
-
gr.
|
186 |
-
gr.Markdown(" ", elem_classes=["section-spacing"])
|
187 |
|
188 |
# MMLU Config Container - Initially hidden until dataset is selected
|
189 |
with gr.Column(visible=False) as mmlu_config_container:
|
190 |
-
gr.Markdown("## (B) Select Dataset Configuration Options"
|
191 |
-
|
192 |
-
# Add more spacing
|
193 |
-
gr.Markdown(" ")
|
194 |
|
195 |
with gr.Row():
|
196 |
# Left column for subject selection
|
@@ -208,27 +203,18 @@ with gr.Blocks(css="""
|
|
208 |
with gr.Column(visible=False) as num_subjects_container:
|
209 |
num_subjects_slider = gr.Slider(
|
210 |
minimum=1,
|
211 |
-
maximum=14,
|
212 |
value=14,
|
213 |
step=1,
|
214 |
label="Number of Subjects",
|
215 |
-
info="Number of subjects to evaluate
|
216 |
)
|
217 |
|
218 |
# Subject checkboxes - initially hidden, shown when "Specify which Subjects to Evaluate" is selected
|
219 |
with gr.Column(visible=False) as specific_subjects_container:
|
220 |
-
#
|
221 |
-
# The actual subjects will come from the dataset preview
|
222 |
specific_subjects = gr.CheckboxGroup(
|
223 |
-
choices=[
|
224 |
-
"Biology (n=717)",
|
225 |
-
"Chemistry (n=500)",
|
226 |
-
"Physics (n=650)",
|
227 |
-
"Mathematics (n=800)",
|
228 |
-
"Computer Science (n=450)",
|
229 |
-
"History (n=300)",
|
230 |
-
"Literature (n=250)"
|
231 |
-
],
|
232 |
label="Select Specific Subjects",
|
233 |
info="Select which specific subjects to evaluate"
|
234 |
)
|
@@ -247,7 +233,7 @@ with gr.Blocks(css="""
|
|
247 |
info="Number of examples to use for few-shot learning (0-5)."
|
248 |
)
|
249 |
|
250 |
-
# Add
|
251 |
gr.Markdown(" ")
|
252 |
|
253 |
with gr.Row():
|
@@ -296,6 +282,39 @@ with gr.Blocks(css="""
|
|
296 |
# Track preview visibility state
|
297 |
preview_visibility = gr.State(False)
|
298 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
# Function to show/hide configuration based on selected dataset
|
300 |
def update_interface_based_on_dataset(dataset, current_visibility):
|
301 |
if dataset == "MMLU-Pro":
|
@@ -317,24 +336,27 @@ with gr.Blocks(css="""
|
|
317 |
gr.update(value="Show Dataset Preview") # Reset button text
|
318 |
)
|
319 |
|
320 |
-
# Connect dataset dropdown to show/hide appropriate configuration
|
321 |
dataset_dropdown.change(
|
|
|
|
|
|
|
|
|
322 |
fn=update_interface_based_on_dataset,
|
323 |
inputs=[dataset_dropdown, preview_visibility],
|
324 |
outputs=[mmlu_config_container, results_container, preview_toggle, dataset_preview_container, preview_visibility, preview_toggle]
|
325 |
)
|
326 |
|
327 |
# Function to toggle dataset preview visibility
|
328 |
-
def toggle_preview(dataset, preview_visibility):
|
329 |
# Toggle the visibility state
|
330 |
is_visible = not preview_visibility
|
331 |
|
332 |
# Update button text based on new state
|
333 |
button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview"
|
334 |
|
335 |
-
#
|
336 |
if is_visible and dataset == "MMLU-Pro":
|
337 |
-
preview_data = mmlupro_dataset_preview(regenerate_preview=False) # Change regenerate_preview=True if you want to regenerate the preview.
|
338 |
formatted_preview = format_preview_for_display(preview_data)
|
339 |
return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text)
|
340 |
elif is_visible:
|
@@ -347,7 +369,7 @@ with gr.Blocks(css="""
|
|
347 |
# Connect preview toggle to show/hide dataset information
|
348 |
preview_toggle.click(
|
349 |
fn=toggle_preview,
|
350 |
-
inputs=[dataset_dropdown, preview_visibility],
|
351 |
outputs=[preview_visibility, dataset_preview_container, preview_output, preview_toggle]
|
352 |
)
|
353 |
|
|
|
136 |
h1 {
|
137 |
text-align: center;
|
138 |
}
|
139 |
+
.section-divider {
|
140 |
+
border-top: 1px solid #ddd;
|
141 |
+
margin: 12px 0;
|
142 |
}
|
143 |
.config-box {
|
144 |
border: 1px solid #ddd;
|
|
|
158 |
""")
|
159 |
|
160 |
# Dataset Selection Section
|
161 |
+
gr.Markdown("## (A) Select Dataset for Evaluation")
|
162 |
|
163 |
with gr.Row():
|
164 |
dataset_dropdown = gr.Dropdown(
|
|
|
177 |
wrap=True,
|
178 |
elem_id="preview_table"
|
179 |
)
|
180 |
+
# Add a divider instead of lots of space
|
181 |
+
gr.Markdown("<div class='section-divider'></div>")
|
|
|
182 |
|
183 |
+
# Preview data state to store the loaded preview data
|
184 |
+
preview_data_state = gr.State(None)
|
|
|
185 |
|
186 |
# MMLU Config Container - Initially hidden until dataset is selected
|
187 |
with gr.Column(visible=False) as mmlu_config_container:
|
188 |
+
gr.Markdown("## (B) Select Dataset Configuration Options")
|
|
|
|
|
|
|
189 |
|
190 |
with gr.Row():
|
191 |
# Left column for subject selection
|
|
|
203 |
with gr.Column(visible=False) as num_subjects_container:
|
204 |
num_subjects_slider = gr.Slider(
|
205 |
minimum=1,
|
206 |
+
maximum=14, # Will be updated dynamically based on preview data
|
207 |
value=14,
|
208 |
step=1,
|
209 |
label="Number of Subjects",
|
210 |
+
info="Number of subjects to evaluate. They will be loaded in alphabetical order."
|
211 |
)
|
212 |
|
213 |
# Subject checkboxes - initially hidden, shown when "Specify which Subjects to Evaluate" is selected
|
214 |
with gr.Column(visible=False) as specific_subjects_container:
|
215 |
+
# Will be populated dynamically from the preview data
|
|
|
216 |
specific_subjects = gr.CheckboxGroup(
|
217 |
+
choices=[], # Will be populated from preview data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
label="Select Specific Subjects",
|
219 |
info="Select which specific subjects to evaluate"
|
220 |
)
|
|
|
233 |
info="Number of examples to use for few-shot learning (0-5)."
|
234 |
)
|
235 |
|
236 |
+
# Add a small space
|
237 |
gr.Markdown(" ")
|
238 |
|
239 |
with gr.Row():
|
|
|
282 |
# Track preview visibility state
|
283 |
preview_visibility = gr.State(False)
|
284 |
|
285 |
+
# Function to process dataset preview data
|
286 |
+
def get_subject_choices_from_preview(preview_data):
|
287 |
+
if not preview_data or 'subject_counts' not in preview_data:
|
288 |
+
return [], 0
|
289 |
+
|
290 |
+
# Get subject counts from preview data
|
291 |
+
subject_counts = preview_data['subject_counts']
|
292 |
+
|
293 |
+
# Sort subjects alphabetically
|
294 |
+
subjects = sorted(subject_counts.keys())
|
295 |
+
|
296 |
+
# Format as "Subject (n=count)"
|
297 |
+
formatted_subjects = [f"{subject} (n={subject_counts[subject]})" for subject in subjects]
|
298 |
+
|
299 |
+
return formatted_subjects, len(subjects)
|
300 |
+
|
301 |
+
# Function to load preview data and update UI
|
302 |
+
def load_dataset_preview(dataset):
|
303 |
+
if dataset == "MMLU-Pro":
|
304 |
+
# Load the preview data
|
305 |
+
preview_data = mmlupro_dataset_preview(regenerate_preview=False)
|
306 |
+
|
307 |
+
# Extract subject choices and count
|
308 |
+
subject_choices, subject_count = get_subject_choices_from_preview(preview_data)
|
309 |
+
|
310 |
+
# Update the UI components
|
311 |
+
return (
|
312 |
+
preview_data, # Store the preview data
|
313 |
+
gr.update(choices=subject_choices), # Update checkbox choices
|
314 |
+
gr.update(maximum=subject_count, value=min(subject_count, 14)) # Update slider max
|
315 |
+
)
|
316 |
+
return None, gr.update(), gr.update()
|
317 |
+
|
318 |
# Function to show/hide configuration based on selected dataset
|
319 |
def update_interface_based_on_dataset(dataset, current_visibility):
|
320 |
if dataset == "MMLU-Pro":
|
|
|
336 |
gr.update(value="Show Dataset Preview") # Reset button text
|
337 |
)
|
338 |
|
339 |
+
# Connect dataset dropdown to show/hide appropriate configuration and load preview data
|
340 |
dataset_dropdown.change(
|
341 |
+
fn=load_dataset_preview,
|
342 |
+
inputs=[dataset_dropdown],
|
343 |
+
outputs=[preview_data_state, specific_subjects, num_subjects_slider],
|
344 |
+
).then(
|
345 |
fn=update_interface_based_on_dataset,
|
346 |
inputs=[dataset_dropdown, preview_visibility],
|
347 |
outputs=[mmlu_config_container, results_container, preview_toggle, dataset_preview_container, preview_visibility, preview_toggle]
|
348 |
)
|
349 |
|
350 |
# Function to toggle dataset preview visibility
|
351 |
+
def toggle_preview(dataset, preview_visibility, preview_data):
|
352 |
# Toggle the visibility state
|
353 |
is_visible = not preview_visibility
|
354 |
|
355 |
# Update button text based on new state
|
356 |
button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview"
|
357 |
|
358 |
+
# Format and show preview if becoming visible
|
359 |
if is_visible and dataset == "MMLU-Pro":
|
|
|
360 |
formatted_preview = format_preview_for_display(preview_data)
|
361 |
return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text)
|
362 |
elif is_visible:
|
|
|
369 |
# Connect preview toggle to show/hide dataset information
|
370 |
preview_toggle.click(
|
371 |
fn=toggle_preview,
|
372 |
+
inputs=[dataset_dropdown, preview_visibility, preview_data_state],
|
373 |
outputs=[preview_visibility, dataset_preview_container, preview_output, preview_toggle]
|
374 |
)
|
375 |
|