v1.1
Browse files
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
|
|
3 |
|
4 |
-
# Data for Table 1: Robustness Results
|
5 |
robustness_data = {
|
6 |
"Model Name": [
|
7 |
"Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
|
@@ -20,7 +21,7 @@ robustness_data = {
|
|
20 |
"Robustness (Ξ)": ["0.83 (β0.12)", "0.84 (β0.12)", "0.85 (β0.10)", "0.81 (β0.16)", "0.90 (β0.08)", "0.64 (β0.19)", "0.82 (β0.13)", "0.86 (β0.09)", "0.89 (β0.07)", "0.80 (β0.14)", "0.70 (β0.21)", "0.80 (β0.14)", "0.82 (β0.13)", "0.75 (β0.17)", "0.86 (β0.09)", "0.85 (β0.10)", "0.84 (β0.10)", "0.74 (β0.17)", "0.80 (β0.15)", "0.82 (β0.12)", "0.58 (β0.28)", "0.70 (β0.18)", "0.63 (β0.26)", "0.83 (β0.13)"]
|
21 |
}
|
22 |
|
23 |
-
# Data for Table 2: Context Grounding Results
|
24 |
context_grounding_data = {
|
25 |
"Model Name": [
|
26 |
"Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
|
@@ -40,21 +41,76 @@ context_grounding_data = {
|
|
40 |
"Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
|
41 |
}
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# Function to create the Gradio interface
|
44 |
def create_leaderboard():
|
45 |
-
# Convert data to DataFrames
|
46 |
robustness_df = pd.DataFrame(robustness_data)
|
47 |
context_grounding_df = pd.DataFrame(context_grounding_data)
|
48 |
|
49 |
-
#
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
55 |
|
56 |
-
with gr.
|
57 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
return demo
|
60 |
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
|
5 |
+
# Data for Table 1: Robustness Results (unchanged, but we'll format it)
|
6 |
robustness_data = {
|
7 |
"Model Name": [
|
8 |
"Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
|
|
|
21 |
"Robustness (Ξ)": ["0.83 (β0.12)", "0.84 (β0.12)", "0.85 (β0.10)", "0.81 (β0.16)", "0.90 (β0.08)", "0.64 (β0.19)", "0.82 (β0.13)", "0.86 (β0.09)", "0.89 (β0.07)", "0.80 (β0.14)", "0.70 (β0.21)", "0.80 (β0.14)", "0.82 (β0.13)", "0.75 (β0.17)", "0.86 (β0.09)", "0.85 (β0.10)", "0.84 (β0.10)", "0.74 (β0.17)", "0.80 (β0.15)", "0.82 (β0.12)", "0.58 (β0.28)", "0.70 (β0.18)", "0.63 (β0.26)", "0.83 (β0.13)"]
|
22 |
}
|
23 |
|
24 |
+
# Data for Table 2: Context Grounding Results (unchanged, but we'll format it)
|
25 |
context_grounding_data = {
|
26 |
"Model Name": [
|
27 |
"Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
|
|
|
41 |
"Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
|
42 |
}
|
43 |
|
44 |
+
# Function to bold the highest score per column (excluding "Model Name")
|
45 |
+
def format_table(df):
|
46 |
+
styled_df = df.copy()
|
47 |
+
numeric_columns = [col for col in df.columns if col != "Model Name"]
|
48 |
+
|
49 |
+
for col in numeric_columns:
|
50 |
+
if col in ["Baseline", "Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]:
|
51 |
+
# Convert string values (e.g., "0.95 (0.0)") to float for comparison, or use direct float values
|
52 |
+
if any(" (" in str(x) for x in df[col]):
|
53 |
+
# Handle string values with deltas (e.g., "0.95 (0.0)")
|
54 |
+
values = [float(str(x).split(" (")[0]) for x in df[col]]
|
55 |
+
else:
|
56 |
+
# Handle direct float values
|
57 |
+
values = df[col].astype(float)
|
58 |
+
|
59 |
+
max_value = np.max(values)
|
60 |
+
styled_df[col] = df[col].apply(lambda x: f"**{x}**" if (float(str(x).split(" (")[0]) if " (" in str(x) else float(x)) == max_value else x)
|
61 |
+
|
62 |
+
return styled_df
|
63 |
+
|
64 |
# Function to create the Gradio interface
|
65 |
def create_leaderboard():
|
66 |
+
# Convert data to DataFrames
|
67 |
robustness_df = pd.DataFrame(robustness_data)
|
68 |
context_grounding_df = pd.DataFrame(context_grounding_data)
|
69 |
|
70 |
+
# Format tables to bold highest scores
|
71 |
+
robustness_df = format_table(robustness_df)
|
72 |
+
context_grounding_df = format_table(context_grounding_df)
|
73 |
+
|
74 |
+
# Create Gradio interface with a nice theme
|
75 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
|
76 |
+
gr.Markdown("# Financial Model Performance Leaderboard")
|
77 |
|
78 |
+
with gr.Row():
|
79 |
+
with gr.Column():
|
80 |
+
with gr.Tab("Robustness Results"):
|
81 |
+
gr.DataFrame(
|
82 |
+
value=robustness_df,
|
83 |
+
label="Robustness Results",
|
84 |
+
wrap=True,
|
85 |
+
height=600, # Increase table height for longer appearance
|
86 |
+
elem_classes=["custom-table"] # Custom CSS class for styling
|
87 |
+
)
|
88 |
+
with gr.Column():
|
89 |
+
with gr.Tab("Context Grounding Results"):
|
90 |
+
gr.DataFrame(
|
91 |
+
value=context_grounding_df,
|
92 |
+
label="Context Grounding Results",
|
93 |
+
wrap=True,
|
94 |
+
height=600, # Increase table height for longer appearance
|
95 |
+
elem_classes=["custom-table"] # Custom CSS class for styling
|
96 |
+
)
|
97 |
+
|
98 |
+
# Custom CSS for better table appearance (larger font, spacing)
|
99 |
+
demo.css = """
|
100 |
+
.custom-table {
|
101 |
+
font-size: 16px; /* Increase font size for readability */
|
102 |
+
line-height: 2; /* Increase line height for longer rows */
|
103 |
+
border-collapse: collapse;
|
104 |
+
}
|
105 |
+
.custom-table th, .custom-table td {
|
106 |
+
padding: 12px; /* Increase padding for spacing */
|
107 |
+
border: 1px solid #ddd;
|
108 |
+
}
|
109 |
+
.custom-table th {
|
110 |
+
background-color: #f5f5f5;
|
111 |
+
font-weight: bold;
|
112 |
+
}
|
113 |
+
"""
|
114 |
|
115 |
return demo
|
116 |
|