wassemgtk commited on
Commit
814faad
·
verified ·
1 Parent(s): c7e5874
Files changed (1) hide show
  1. app.py +59 -3
app.py CHANGED
@@ -61,6 +61,37 @@ def format_table(df):
61
 
62
  return styled_df
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # Function to create the Gradio interface
65
  def create_leaderboard():
66
  # Convert data to DataFrames
@@ -71,6 +102,9 @@ def create_leaderboard():
71
  robustness_df = format_table(robustness_df)
72
  context_grounding_df = format_table(context_grounding_df)
73
 
 
 
 
74
  # Create Gradio interface with a nice theme
75
  with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
76
  gr.Markdown("# Financial Model Performance Leaderboard")
@@ -82,16 +116,38 @@ def create_leaderboard():
82
  value=robustness_df,
83
  label="Robustness Results",
84
  wrap=True,
85
- elem_classes=["custom-table"] # Custom CSS class for styling
86
  )
87
- with gr.Column():
88
  with gr.Tab("Context Grounding Results"):
89
  gr.DataFrame(
90
  value=context_grounding_df,
91
  label="Context Grounding Results",
92
  wrap=True,
93
- elem_classes=["custom-table"] # Custom CSS class for styling
 
 
 
 
 
 
 
94
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  # Custom CSS for better table appearance (larger font, spacing, and height)
97
  demo.css = """
 
61
 
62
  return styled_df
63
 
64
+ # Function to calculate top 3 models based on combined score (average of numeric columns)
65
+ def get_top_3_models(robustness_df, context_grounding_df):
66
+ # Combine numeric columns from both datasets
67
+ numeric_cols_robustness = ["Baseline", "Robustness"] # Example numeric columns from robustness
68
+ numeric_cols_context = ["Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"] # From context grounding
69
+
70
+ # Extract numeric values for scoring
71
+ robustness_scores = robustness_df[numeric_cols_robustness].apply(lambda x: float(str(x).split(" (")[0]) if " (" in str(x) else float(x), axis=1)
72
+ context_scores = context_grounding_df[numeric_cols_context].astype(float)
73
+
74
+ # Combine scores by averaging
75
+ combined_scores = (robustness_scores.mean(axis=1) + context_scores.mean(axis=1)) / 2
76
+
77
+ # Add combined scores to a DataFrame for sorting
78
+ combined_df = pd.DataFrame({
79
+ "Model Name": robustness_df["Model Name"],
80
+ "Combined Score": combined_scores
81
+ })
82
+
83
+ # Sort by combined score in descending order and get top 3
84
+ top_3 = combined_df.sort_values(by="Combined Score", ascending=False).head(3)
85
+
86
+ # Format the winners table
87
+ winners_df = pd.DataFrame({
88
+ "Rank": [1, 2, 3],
89
+ "Model Name": top_3["Model Name"],
90
+ "Combined Score": top_3["Combined Score"].round(3)
91
+ })
92
+
93
+ return winners_df
94
+
95
  # Function to create the Gradio interface
96
  def create_leaderboard():
97
  # Convert data to DataFrames
 
102
  robustness_df = format_table(robustness_df)
103
  context_grounding_df = format_table(context_grounding_df)
104
 
105
+ # Get top 3 winners
106
+ winners_df = get_top_3_models(robustness_df, context_grounding_df)
107
+
108
  # Create Gradio interface with a nice theme
109
  with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
110
  gr.Markdown("# Financial Model Performance Leaderboard")
 
116
  value=robustness_df,
117
  label="Robustness Results",
118
  wrap=True,
119
+ elem_classes=["custom-table"]
120
  )
 
121
  with gr.Tab("Context Grounding Results"):
122
  gr.DataFrame(
123
  value=context_grounding_df,
124
  label="Context Grounding Results",
125
  wrap=True,
126
+ elem_classes=["custom-table"]
127
+ )
128
+ with gr.Tab("Top 3 Winners"):
129
+ gr.DataFrame(
130
+ value=winners_df,
131
+ label="Top 3 Models",
132
+ wrap=True,
133
+ elem_classes=["custom-table"]
134
  )
135
+ with gr.Tab("About"):
136
+ gr.HTML("""
137
+ <div style="padding: 20px;">
138
+ <h2>About This Leaderboard</h2>
139
+ <p>This Financial Model Performance Leaderboard compares the performance of various AI models across robustness and context grounding metrics. The data is sourced from evaluations conducted on February 18, 2025, and reflects the models' ability to handle financial tasks under different conditions.</p>
140
+ <p>For more information, contact us at <a href="mailto:[email protected]">[email protected]</a>.</p>
141
+ </div>
142
+ """)
143
+ with gr.Row():
144
+ submit_btn = gr.Button("Submit Feedback")
145
+ output = gr.Textbox(label="Feedback Submission Status", placeholder="Your feedback will appear here...")
146
+
147
+ def submit_feedback():
148
+ return "Thank you for your feedback!"
149
+
150
+ submit_btn.click(fn=submit_feedback, inputs=None, outputs=output)
151
 
152
  # Custom CSS for better table appearance (larger font, spacing, and height)
153
  demo.css = """