v1.1
Browse files
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
|
| 4 |
-
# Data for Table 1: Robustness Results
|
| 5 |
robustness_data = {
|
| 6 |
"Model Name": [
|
| 7 |
"Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
|
|
@@ -20,7 +21,7 @@ robustness_data = {
|
|
| 20 |
"Robustness (Ξ)": ["0.83 (β0.12)", "0.84 (β0.12)", "0.85 (β0.10)", "0.81 (β0.16)", "0.90 (β0.08)", "0.64 (β0.19)", "0.82 (β0.13)", "0.86 (β0.09)", "0.89 (β0.07)", "0.80 (β0.14)", "0.70 (β0.21)", "0.80 (β0.14)", "0.82 (β0.13)", "0.75 (β0.17)", "0.86 (β0.09)", "0.85 (β0.10)", "0.84 (β0.10)", "0.74 (β0.17)", "0.80 (β0.15)", "0.82 (β0.12)", "0.58 (β0.28)", "0.70 (β0.18)", "0.63 (β0.26)", "0.83 (β0.13)"]
|
| 21 |
}
|
| 22 |
|
| 23 |
-
# Data for Table 2: Context Grounding Results
|
| 24 |
context_grounding_data = {
|
| 25 |
"Model Name": [
|
| 26 |
"Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
|
|
@@ -40,21 +41,76 @@ context_grounding_data = {
|
|
| 40 |
"Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
|
| 41 |
}
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
# Function to create the Gradio interface
|
| 44 |
def create_leaderboard():
|
| 45 |
-
# Convert data to DataFrames
|
| 46 |
robustness_df = pd.DataFrame(robustness_data)
|
| 47 |
context_grounding_df = pd.DataFrame(context_grounding_data)
|
| 48 |
|
| 49 |
-
#
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
| 55 |
|
| 56 |
-
with gr.
|
| 57 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
return demo
|
| 60 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
|
| 5 |
+
# Data for Table 1: Robustness Results (unchanged, but we'll format it)
|
| 6 |
robustness_data = {
|
| 7 |
"Model Name": [
|
| 8 |
"Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
|
|
|
|
| 21 |
"Robustness (Ξ)": ["0.83 (β0.12)", "0.84 (β0.12)", "0.85 (β0.10)", "0.81 (β0.16)", "0.90 (β0.08)", "0.64 (β0.19)", "0.82 (β0.13)", "0.86 (β0.09)", "0.89 (β0.07)", "0.80 (β0.14)", "0.70 (β0.21)", "0.80 (β0.14)", "0.82 (β0.13)", "0.75 (β0.17)", "0.86 (β0.09)", "0.85 (β0.10)", "0.84 (β0.10)", "0.74 (β0.17)", "0.80 (β0.15)", "0.82 (β0.12)", "0.58 (β0.28)", "0.70 (β0.18)", "0.63 (β0.26)", "0.83 (β0.13)"]
|
| 22 |
}
|
| 23 |
|
| 24 |
+
# Data for Table 2: Context Grounding Results (unchanged, but we'll format it)
|
| 25 |
context_grounding_data = {
|
| 26 |
"Model Name": [
|
| 27 |
"Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
|
|
|
|
| 41 |
"Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
|
| 42 |
}
|
| 43 |
|
| 44 |
+
# Function to bold the highest score per column (excluding "Model Name")
|
| 45 |
+
def format_table(df):
|
| 46 |
+
styled_df = df.copy()
|
| 47 |
+
numeric_columns = [col for col in df.columns if col != "Model Name"]
|
| 48 |
+
|
| 49 |
+
for col in numeric_columns:
|
| 50 |
+
if col in ["Baseline", "Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]:
|
| 51 |
+
# Convert string values (e.g., "0.95 (0.0)") to float for comparison, or use direct float values
|
| 52 |
+
if any(" (" in str(x) for x in df[col]):
|
| 53 |
+
# Handle string values with deltas (e.g., "0.95 (0.0)")
|
| 54 |
+
values = [float(str(x).split(" (")[0]) for x in df[col]]
|
| 55 |
+
else:
|
| 56 |
+
# Handle direct float values
|
| 57 |
+
values = df[col].astype(float)
|
| 58 |
+
|
| 59 |
+
max_value = np.max(values)
|
| 60 |
+
styled_df[col] = df[col].apply(lambda x: f"**{x}**" if (float(str(x).split(" (")[0]) if " (" in str(x) else float(x)) == max_value else x)
|
| 61 |
+
|
| 62 |
+
return styled_df
|
| 63 |
+
|
| 64 |
# Function to create the Gradio interface
|
| 65 |
def create_leaderboard():
|
| 66 |
+
# Convert data to DataFrames
|
| 67 |
robustness_df = pd.DataFrame(robustness_data)
|
| 68 |
context_grounding_df = pd.DataFrame(context_grounding_data)
|
| 69 |
|
| 70 |
+
# Format tables to bold highest scores
|
| 71 |
+
robustness_df = format_table(robustness_df)
|
| 72 |
+
context_grounding_df = format_table(context_grounding_df)
|
| 73 |
+
|
| 74 |
+
# Create Gradio interface with a nice theme
|
| 75 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
|
| 76 |
+
gr.Markdown("# Financial Model Performance Leaderboard")
|
| 77 |
|
| 78 |
+
with gr.Row():
|
| 79 |
+
with gr.Column():
|
| 80 |
+
with gr.Tab("Robustness Results"):
|
| 81 |
+
gr.DataFrame(
|
| 82 |
+
value=robustness_df,
|
| 83 |
+
label="Robustness Results",
|
| 84 |
+
wrap=True,
|
| 85 |
+
height=600, # Increase table height for longer appearance
|
| 86 |
+
elem_classes=["custom-table"] # Custom CSS class for styling
|
| 87 |
+
)
|
| 88 |
+
with gr.Column():
|
| 89 |
+
with gr.Tab("Context Grounding Results"):
|
| 90 |
+
gr.DataFrame(
|
| 91 |
+
value=context_grounding_df,
|
| 92 |
+
label="Context Grounding Results",
|
| 93 |
+
wrap=True,
|
| 94 |
+
height=600, # Increase table height for longer appearance
|
| 95 |
+
elem_classes=["custom-table"] # Custom CSS class for styling
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# Custom CSS for better table appearance (larger font, spacing)
|
| 99 |
+
demo.css = """
|
| 100 |
+
.custom-table {
|
| 101 |
+
font-size: 16px; /* Increase font size for readability */
|
| 102 |
+
line-height: 2; /* Increase line height for longer rows */
|
| 103 |
+
border-collapse: collapse;
|
| 104 |
+
}
|
| 105 |
+
.custom-table th, .custom-table td {
|
| 106 |
+
padding: 12px; /* Increase padding for spacing */
|
| 107 |
+
border: 1px solid #ddd;
|
| 108 |
+
}
|
| 109 |
+
.custom-table th {
|
| 110 |
+
background-color: #f5f5f5;
|
| 111 |
+
font-weight: bold;
|
| 112 |
+
}
|
| 113 |
+
"""
|
| 114 |
|
| 115 |
return demo
|
| 116 |
|