H2H-eval-comparator

Sleeping

App Files Files Community

rohansampath commited on Feb 27

Commit

9d60cf6

verified ·

1 Parent(s): b5e28ab

Update run_evaluation.py

Browse files

Files changed (1) hide show

run_evaluation.py +11 -25

run_evaluation.py CHANGED Viewed

@@ -119,34 +119,20 @@ def run_mmlu_evaluation(subject_selection_mode, num_subjects, selected_subjects,
         comparison_df = pd.DataFrame(comparison_data)
         # Format the report
-        return (
-            report,                    # Report markdown
-            comparison_df,             # Results dataframe
-            gr.Button.update(interactive=True),  # Enable eval button
-            gr.Button.update(interactive=False), # Disable cancel button
-            gr.Radio.update(interactive=True),   # Enable subject selection mode
-            gr.Slider.update(interactive=True),  # Enable subjects slider
-            gr.Checkbox.update(interactive=True), # Enable all questions checkbox
-            gr.Slider.update(interactive=True),   # Enable questions slider
-            gr.Dropdown.update(interactive=True), # Enable model dropdown
-            gr.Column.update(visible=True)        # Make table container visible
-        )
     except Exception as e:
         # Handle errors gracefully
         error_trace = traceback.format_exc()
         error_message = f"### Error during evaluation\n```\n{error_trace}\n```"
-        # Return error values for all 10 components
-        return (
-            error_message,              # Error message in markdown
-            None,                       # No dataframe
-            gr.Button.update(interactive=True),  # Enable eval button
-            gr.Button.update(interactive=False), # Disable cancel button
-            gr.Radio.update(interactive=True),   # Enable subject selection mode
-            gr.Slider.update(interactive=True),  # Enable subjects slider
-            gr.Checkbox.update(interactive=True), # Enable all questions checkbox
-            gr.Slider.update(interactive=True),   # Enable questions slider
-            gr.Dropdown.update(interactive=True), # Enable model dropdown
-            gr.Column.update(visible=False)       # Hide table container
-        )

         comparison_df = pd.DataFrame(comparison_data)
         # Format the report
+        return {
+            'report': report,
+            'comparison_df': comparison_df,
+            'success': True
+        }
     except Exception as e:
         # Handle errors gracefully
         error_trace = traceback.format_exc()
         error_message = f"### Error during evaluation\n```\n{error_trace}\n```"
+        # Return error information
+        return {
+            'report': error_message,
+            'comparison_df': None,
+            'success': False
+        }