v1.3
Browse files
app.py
CHANGED
@@ -61,6 +61,37 @@ def format_table(df):
|
|
61 |
|
62 |
return styled_df
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
# Function to create the Gradio interface
|
65 |
def create_leaderboard():
|
66 |
# Convert data to DataFrames
|
@@ -71,6 +102,9 @@ def create_leaderboard():
|
|
71 |
robustness_df = format_table(robustness_df)
|
72 |
context_grounding_df = format_table(context_grounding_df)
|
73 |
|
|
|
|
|
|
|
74 |
# Create Gradio interface with a nice theme
|
75 |
with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
|
76 |
gr.Markdown("# Financial Model Performance Leaderboard")
|
@@ -82,16 +116,38 @@ def create_leaderboard():
|
|
82 |
value=robustness_df,
|
83 |
label="Robustness Results",
|
84 |
wrap=True,
|
85 |
-
elem_classes=["custom-table"]
|
86 |
)
|
87 |
-
with gr.Column():
|
88 |
with gr.Tab("Context Grounding Results"):
|
89 |
gr.DataFrame(
|
90 |
value=context_grounding_df,
|
91 |
label="Context Grounding Results",
|
92 |
wrap=True,
|
93 |
-
elem_classes=["custom-table"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
# Custom CSS for better table appearance (larger font, spacing, and height)
|
97 |
demo.css = """
|
|
|
61 |
|
62 |
return styled_df
|
63 |
|
64 |
+
# Function to calculate top 3 models based on combined score (average of numeric columns)
|
65 |
+
def get_top_3_models(robustness_df, context_grounding_df):
|
66 |
+
# Combine numeric columns from both datasets
|
67 |
+
numeric_cols_robustness = ["Baseline", "Robustness"] # Example numeric columns from robustness
|
68 |
+
numeric_cols_context = ["Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"] # From context grounding
|
69 |
+
|
70 |
+
# Extract numeric values for scoring
|
71 |
+
robustness_scores = robustness_df[numeric_cols_robustness].apply(lambda x: float(str(x).split(" (")[0]) if " (" in str(x) else float(x), axis=1)
|
72 |
+
context_scores = context_grounding_df[numeric_cols_context].astype(float)
|
73 |
+
|
74 |
+
# Combine scores by averaging
|
75 |
+
combined_scores = (robustness_scores.mean(axis=1) + context_scores.mean(axis=1)) / 2
|
76 |
+
|
77 |
+
# Add combined scores to a DataFrame for sorting
|
78 |
+
combined_df = pd.DataFrame({
|
79 |
+
"Model Name": robustness_df["Model Name"],
|
80 |
+
"Combined Score": combined_scores
|
81 |
+
})
|
82 |
+
|
83 |
+
# Sort by combined score in descending order and get top 3
|
84 |
+
top_3 = combined_df.sort_values(by="Combined Score", ascending=False).head(3)
|
85 |
+
|
86 |
+
# Format the winners table
|
87 |
+
winners_df = pd.DataFrame({
|
88 |
+
"Rank": [1, 2, 3],
|
89 |
+
"Model Name": top_3["Model Name"],
|
90 |
+
"Combined Score": top_3["Combined Score"].round(3)
|
91 |
+
})
|
92 |
+
|
93 |
+
return winners_df
|
94 |
+
|
95 |
# Function to create the Gradio interface
|
96 |
def create_leaderboard():
|
97 |
# Convert data to DataFrames
|
|
|
102 |
robustness_df = format_table(robustness_df)
|
103 |
context_grounding_df = format_table(context_grounding_df)
|
104 |
|
105 |
+
# Get top 3 winners
|
106 |
+
winners_df = get_top_3_models(robustness_df, context_grounding_df)
|
107 |
+
|
108 |
# Create Gradio interface with a nice theme
|
109 |
with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
|
110 |
gr.Markdown("# Financial Model Performance Leaderboard")
|
|
|
116 |
value=robustness_df,
|
117 |
label="Robustness Results",
|
118 |
wrap=True,
|
119 |
+
elem_classes=["custom-table"]
|
120 |
)
|
|
|
121 |
with gr.Tab("Context Grounding Results"):
|
122 |
gr.DataFrame(
|
123 |
value=context_grounding_df,
|
124 |
label="Context Grounding Results",
|
125 |
wrap=True,
|
126 |
+
elem_classes=["custom-table"]
|
127 |
+
)
|
128 |
+
with gr.Tab("Top 3 Winners"):
|
129 |
+
gr.DataFrame(
|
130 |
+
value=winners_df,
|
131 |
+
label="Top 3 Models",
|
132 |
+
wrap=True,
|
133 |
+
elem_classes=["custom-table"]
|
134 |
)
|
135 |
+
with gr.Tab("About"):
|
136 |
+
gr.HTML("""
|
137 |
+
<div style="padding: 20px;">
|
138 |
+
<h2>About This Leaderboard</h2>
|
139 |
+
<p>This Financial Model Performance Leaderboard compares the performance of various AI models across robustness and context grounding metrics. The data is sourced from evaluations conducted on February 18, 2025, and reflects the models' ability to handle financial tasks under different conditions.</p>
|
140 |
+
<p>For more information, contact us at <a href="mailto:[email protected]">[email protected]</a>.</p>
|
141 |
+
</div>
|
142 |
+
""")
|
143 |
+
with gr.Row():
|
144 |
+
submit_btn = gr.Button("Submit Feedback")
|
145 |
+
output = gr.Textbox(label="Feedback Submission Status", placeholder="Your feedback will appear here...")
|
146 |
+
|
147 |
+
def submit_feedback():
|
148 |
+
return "Thank you for your feedback!"
|
149 |
+
|
150 |
+
submit_btn.click(fn=submit_feedback, inputs=None, outputs=output)
|
151 |
|
152 |
# Custom CSS for better table appearance (larger font, spacing, and height)
|
153 |
demo.css = """
|