rohansampath commited on
Commit
ed9a008
·
verified ·
1 Parent(s): 84c9e35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -14
app.py CHANGED
@@ -61,7 +61,7 @@ def run_toy_evaluation():
61
  # 3. MMLU Evaluation call
62
  # ---------------------------------------------------------------------------
63
  @spaces.GPU(duration=120) # Allow up to 2 minutes for full evaluation
64
- def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, num_examples):
65
  """
66
  Runs the MMLU evaluation with the specified parameters.
67
 
@@ -69,7 +69,8 @@ def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, num_examples):
69
  all_subjects (bool): Whether to evaluate all subjects
70
  num_subjects (int): Number of subjects to evaluate (1-57)
71
  num_shots (int): Number of few-shot examples (0-5)
72
- num_examples (int): Number of examples per subject (1-10 or -1 for all)
 
73
  """
74
 
75
  if not model_loaded:
@@ -81,13 +82,17 @@ def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, num_examples):
81
  # Convert num_subjects to -1 if all_subjects is True
82
  if all_subjects:
83
  num_subjects = -1
 
 
 
 
84
 
85
  # Run evaluation
86
  results = evaluate_mmlu(
87
  model,
88
  tokenizer,
89
  num_subjects=num_subjects,
90
- num_questions=num_examples,
91
  num_shots=num_shots
92
  )
93
 
@@ -138,13 +143,13 @@ with gr.Blocks() as demo:
138
  with gr.Row():
139
  all_subjects_checkbox = gr.Checkbox(
140
  label="Evaluate All Subjects",
141
- value=True,
142
  info="When checked, evaluates all 57 MMLU subjects"
143
  )
144
  num_subjects_slider = gr.Slider(
145
  minimum=1,
146
  maximum=57,
147
- value=57,
148
  step=1,
149
  label="Number of Subjects",
150
  info="Number of subjects to evaluate (1-57). They will be loaded in alphabetical order.",
@@ -155,18 +160,26 @@ with gr.Blocks() as demo:
155
  num_shots_slider = gr.Slider(
156
  minimum=0,
157
  maximum=5,
158
- value=5,
159
  step=1,
160
  label="Number of Few-shot Examples",
161
  info="Number of examples to use for few-shot learning (0-5). They will be loaded in alphabetical order."
162
  )
163
- num_examples_slider = gr.Slider(
 
 
 
 
 
 
 
164
  minimum=1,
165
- maximum=10,
166
- value=5,
167
  step=1,
168
- label="Examples per Subject",
169
- info="Number of test examples per subject (1-10). They will be loaded in alphabetical order."
 
170
  )
171
 
172
  with gr.Row():
@@ -184,12 +197,31 @@ with gr.Blocks() as demo:
184
  )
185
 
186
  # Update num_subjects_slider interactivity based on all_subjects checkbox
 
 
 
 
 
 
187
  all_subjects_checkbox.change(
188
- fn=lambda x: gr.update(interactive=not x),
189
  inputs=[all_subjects_checkbox],
190
  outputs=[num_subjects_slider]
191
  )
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  # Connect MMLU evaluation button
194
  eval_mmlu_button.click(
195
  fn=run_mmlu_evaluation,
@@ -197,9 +229,10 @@ with gr.Blocks() as demo:
197
  all_subjects_checkbox,
198
  num_subjects_slider,
199
  num_shots_slider,
200
- num_examples_slider
 
201
  ],
202
  outputs=results_output
203
  )
204
 
205
- demo.launch()
 
61
  # 3. MMLU Evaluation call
62
  # ---------------------------------------------------------------------------
63
  @spaces.GPU(duration=120) # Allow up to 2 minutes for full evaluation
64
+ def run_mmlu_evaluation(all_subjects, num_subjects, num_shots, all_questions, num_questions):
65
  """
66
  Runs the MMLU evaluation with the specified parameters.
67
 
 
69
  all_subjects (bool): Whether to evaluate all subjects
70
  num_subjects (int): Number of subjects to evaluate (1-57)
71
  num_shots (int): Number of few-shot examples (0-5)
72
+ all_questions (bool): Whether to evaluate all questions per subject
73
+ num_questions (int): Number of examples per subject (1-20 or -1 for all)
74
  """
75
 
76
  if not model_loaded:
 
82
  # Convert num_subjects to -1 if all_subjects is True
83
  if all_subjects:
84
  num_subjects = -1
85
+
86
+ # Convert num_questions to -1 if all_questions is True
87
+ if all_questions:
88
+ num_questions = -1
89
 
90
  # Run evaluation
91
  results = evaluate_mmlu(
92
  model,
93
  tokenizer,
94
  num_subjects=num_subjects,
95
+ num_questions=num_questions,
96
  num_shots=num_shots
97
  )
98
 
 
143
  with gr.Row():
144
  all_subjects_checkbox = gr.Checkbox(
145
  label="Evaluate All Subjects",
146
+ value=False, # Default is unchecked
147
  info="When checked, evaluates all 57 MMLU subjects"
148
  )
149
  num_subjects_slider = gr.Slider(
150
  minimum=1,
151
  maximum=57,
152
+ value=10, # Default is 10 subjects
153
  step=1,
154
  label="Number of Subjects",
155
  info="Number of subjects to evaluate (1-57). They will be loaded in alphabetical order.",
 
160
  num_shots_slider = gr.Slider(
161
  minimum=0,
162
  maximum=5,
163
+ value=5, # Default is 5 few-shot examples
164
  step=1,
165
  label="Number of Few-shot Examples",
166
  info="Number of examples to use for few-shot learning (0-5). They will be loaded in alphabetical order."
167
  )
168
+
169
+ with gr.Row():
170
+ all_questions_checkbox = gr.Checkbox(
171
+ label="Evaluate All Questions",
172
+ value=False, # Default is unchecked
173
+ info="When checked, evaluates all available questions for each subject"
174
+ )
175
+ num_questions_slider = gr.Slider(
176
  minimum=1,
177
+ maximum=20,
178
+ value=10, # Default is 10 questions
179
  step=1,
180
+ label="Questions per Subject",
181
+ info="Choose a subset of questions (1-20), or click the checkbox for All Questions",
182
+ interactive=True
183
  )
184
 
185
  with gr.Row():
 
197
  )
198
 
199
  # Update num_subjects_slider interactivity based on all_subjects checkbox
200
+ def update_subjects_slider(checked):
201
+ if checked:
202
+ return gr.update(value=57, interactive=False)
203
+ else:
204
+ return gr.update(interactive=True)
205
+
206
  all_subjects_checkbox.change(
207
+ fn=update_subjects_slider,
208
  inputs=[all_subjects_checkbox],
209
  outputs=[num_subjects_slider]
210
  )
211
 
212
+ # Update num_questions_slider interactivity based on all_questions checkbox
213
+ def update_questions_slider(checked):
214
+ if checked:
215
+ return gr.update(interactive=False)
216
+ else:
217
+ return gr.update(interactive=True)
218
+
219
+ all_questions_checkbox.change(
220
+ fn=update_questions_slider,
221
+ inputs=[all_questions_checkbox],
222
+ outputs=[num_questions_slider]
223
+ )
224
+
225
  # Connect MMLU evaluation button
226
  eval_mmlu_button.click(
227
  fn=run_mmlu_evaluation,
 
229
  all_subjects_checkbox,
230
  num_subjects_slider,
231
  num_shots_slider,
232
+ all_questions_checkbox,
233
+ num_questions_slider
234
  ],
235
  outputs=results_output
236
  )
237
 
238
+ demo.launch()