Spaces:

Writer
/

Financial_LLM_Performance_Leaderboard

Running

App Files Files Community

wassemgtk commited on Feb 18

Commit

33a05be

verified ·

1 Parent(s): f7f7ef8

v1.1

Browse files

Files changed (1) hide show

app.py +67 -11

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import gradio as gr
 import pandas as pd
-# Data for Table 1: Robustness Results
 robustness_data = {
     "Model Name": [
         "Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
@@ -20,7 +21,7 @@ robustness_data = {
     "Robustness (Δ)": ["0.83 (↓0.12)", "0.84 (↓0.12)", "0.85 (↓0.10)", "0.81 (↓0.16)", "0.90 (↓0.08)", "0.64 (↓0.19)", "0.82 (↓0.13)", "0.86 (↓0.09)", "0.89 (↓0.07)", "0.80 (↓0.14)", "0.70 (↓0.21)", "0.80 (↓0.14)", "0.82 (↓0.13)", "0.75 (↓0.17)", "0.86 (↓0.09)", "0.85 (↓0.10)", "0.84 (↓0.10)", "0.74 (↓0.17)", "0.80 (↓0.15)", "0.82 (↓0.12)", "0.58 (↓0.28)", "0.70 (↓0.18)", "0.63 (↓0.26)", "0.83 (↓0.13)"]
 }
-# Data for Table 2: Context Grounding Results
 context_grounding_data = {
     "Model Name": [
         "Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
@@ -40,21 +41,76 @@ context_grounding_data = {
     "Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
 }
 # Function to create the Gradio interface
 def create_leaderboard():
-    # Convert data to DataFrames for better display
     robustness_df = pd.DataFrame(robustness_data)
     context_grounding_df = pd.DataFrame(context_grounding_data)
-    # Create Gradio interface with two tabs for each table
-    with gr.Blocks(title="Model Performance Leaderboard") as demo:
-        gr.Markdown("# Model Performance Leaderboard")
-        with gr.Tab("Robustness Results"):
-            gr.DataFrame(value=robustness_df, label="Robustness Results", wrap=True)
-        with gr.Tab("Context Grounding Results"):
-            gr.DataFrame(value=context_grounding_df, label="Context Grounding Results", wrap=True)
     return demo

 import gradio as gr
 import pandas as pd
+import numpy as np
+# Data for Table 1: Robustness Results (unchanged, but we'll format it)
 robustness_data = {
     "Model Name": [
         "Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
     "Robustness (Δ)": ["0.83 (↓0.12)", "0.84 (↓0.12)", "0.85 (↓0.10)", "0.81 (↓0.16)", "0.90 (↓0.08)", "0.64 (↓0.19)", "0.82 (↓0.13)", "0.86 (↓0.09)", "0.89 (↓0.07)", "0.80 (↓0.14)", "0.70 (↓0.21)", "0.80 (↓0.14)", "0.82 (↓0.13)", "0.75 (↓0.17)", "0.86 (↓0.09)", "0.85 (↓0.10)", "0.84 (↓0.10)", "0.74 (↓0.17)", "0.80 (↓0.15)", "0.82 (↓0.12)", "0.58 (↓0.28)", "0.70 (↓0.18)", "0.63 (↓0.26)", "0.83 (↓0.13)"]
 }
+# Data for Table 2: Context Grounding Results (unchanged, but we'll format it)
 context_grounding_data = {
     "Model Name": [
         "Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
     "Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
 }
+# Function to bold the highest score per column (excluding "Model Name")
+def format_table(df):
+    styled_df = df.copy()
+    numeric_columns = [col for col in df.columns if col != "Model Name"]
+    for col in numeric_columns:
+        if col in ["Baseline", "Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]:
+            # Convert string values (e.g., "0.95 (0.0)") to float for comparison, or use direct float values
+            if any(" (" in str(x) for x in df[col]):
+                # Handle string values with deltas (e.g., "0.95 (0.0)")
+                values = [float(str(x).split(" (")[0]) for x in df[col]]
+            else:
+                # Handle direct float values
+                values = df[col].astype(float)
+            max_value = np.max(values)
+            styled_df[col] = df[col].apply(lambda x: f"**{x}**" if (float(str(x).split(" (")[0]) if " (" in str(x) else float(x)) == max_value else x)
+    return styled_df
 # Function to create the Gradio interface
 def create_leaderboard():
+    # Convert data to DataFrames
     robustness_df = pd.DataFrame(robustness_data)
     context_grounding_df = pd.DataFrame(context_grounding_data)
+    # Format tables to bold highest scores
+    robustness_df = format_table(robustness_df)
+    context_grounding_df = format_table(context_grounding_df)
+    # Create Gradio interface with a nice theme
+    with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
+        gr.Markdown("# Financial Model Performance Leaderboard")
+        with gr.Row():
+            with gr.Column():
+                with gr.Tab("Robustness Results"):
+                    gr.DataFrame(
+                        value=robustness_df,
+                        label="Robustness Results",
+                        wrap=True,
+                        height=600,  # Increase table height for longer appearance
+                        elem_classes=["custom-table"]  # Custom CSS class for styling
+                    )
+            with gr.Column():
+                with gr.Tab("Context Grounding Results"):
+                    gr.DataFrame(
+                        value=context_grounding_df,
+                        label="Context Grounding Results",
+                        wrap=True,
+                        height=600,  # Increase table height for longer appearance
+                        elem_classes=["custom-table"]  # Custom CSS class for styling
+                    )
+    # Custom CSS for better table appearance (larger font, spacing)
+    demo.css = """
+    .custom-table {
+        font-size: 16px;  /* Increase font size for readability */
+        line-height: 2;   /* Increase line height for longer rows */
+        border-collapse: collapse;
+    }
+    .custom-table th, .custom-table td {
+        padding: 12px;    /* Increase padding for spacing */
+        border: 1px solid #ddd;
+    }
+    .custom-table th {
+        background-color: #f5f5f5;
+        font-weight: bold;
+    }
+    """
     return demo