wassemgtk commited on
Commit
33a05be
Β·
verified Β·
1 Parent(s): f7f7ef8
Files changed (1) hide show
  1. app.py +67 -11
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
 
4
- # Data for Table 1: Robustness Results
5
  robustness_data = {
6
  "Model Name": [
7
  "Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
@@ -20,7 +21,7 @@ robustness_data = {
20
  "Robustness (Ξ”)": ["0.83 (↓0.12)", "0.84 (↓0.12)", "0.85 (↓0.10)", "0.81 (↓0.16)", "0.90 (↓0.08)", "0.64 (↓0.19)", "0.82 (↓0.13)", "0.86 (↓0.09)", "0.89 (↓0.07)", "0.80 (↓0.14)", "0.70 (↓0.21)", "0.80 (↓0.14)", "0.82 (↓0.13)", "0.75 (↓0.17)", "0.86 (↓0.09)", "0.85 (↓0.10)", "0.84 (↓0.10)", "0.74 (↓0.17)", "0.80 (↓0.15)", "0.82 (↓0.12)", "0.58 (↓0.28)", "0.70 (↓0.18)", "0.63 (↓0.26)", "0.83 (↓0.13)"]
21
  }
22
 
23
- # Data for Table 2: Context Grounding Results
24
  context_grounding_data = {
25
  "Model Name": [
26
  "Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
@@ -40,21 +41,76 @@ context_grounding_data = {
40
  "Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
41
  }
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # Function to create the Gradio interface
44
  def create_leaderboard():
45
- # Convert data to DataFrames for better display
46
  robustness_df = pd.DataFrame(robustness_data)
47
  context_grounding_df = pd.DataFrame(context_grounding_data)
48
 
49
- # Create Gradio interface with two tabs for each table
50
- with gr.Blocks(title="Model Performance Leaderboard") as demo:
51
- gr.Markdown("# Model Performance Leaderboard")
52
-
53
- with gr.Tab("Robustness Results"):
54
- gr.DataFrame(value=robustness_df, label="Robustness Results", wrap=True)
 
55
 
56
- with gr.Tab("Context Grounding Results"):
57
- gr.DataFrame(value=context_grounding_df, label="Context Grounding Results", wrap=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  return demo
60
 
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import numpy as np
4
 
5
+ # Data for Table 1: Robustness Results (unchanged, but we'll format it)
6
  robustness_data = {
7
  "Model Name": [
8
  "Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
 
21
  "Robustness (Ξ”)": ["0.83 (↓0.12)", "0.84 (↓0.12)", "0.85 (↓0.10)", "0.81 (↓0.16)", "0.90 (↓0.08)", "0.64 (↓0.19)", "0.82 (↓0.13)", "0.86 (↓0.09)", "0.89 (↓0.07)", "0.80 (↓0.14)", "0.70 (↓0.21)", "0.80 (↓0.14)", "0.82 (↓0.13)", "0.75 (↓0.17)", "0.86 (↓0.09)", "0.85 (↓0.10)", "0.84 (↓0.10)", "0.74 (↓0.17)", "0.80 (↓0.15)", "0.82 (↓0.12)", "0.58 (↓0.28)", "0.70 (↓0.18)", "0.63 (↓0.26)", "0.83 (↓0.13)"]
22
  }
23
 
24
+ # Data for Table 2: Context Grounding Results (unchanged, but we'll format it)
25
  context_grounding_data = {
26
  "Model Name": [
27
  "Gemini 2.0 Flash Exp", "Gemini 1.5 Pro 002", "OpenAI GPT-4o", "OpenAI o1", "OpenAI o3-mini",
 
41
  "Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
42
  }
43
 
44
+ # Function to bold the highest score per column (excluding "Model Name")
45
+ def format_table(df):
46
+ styled_df = df.copy()
47
+ numeric_columns = [col for col in df.columns if col != "Model Name"]
48
+
49
+ for col in numeric_columns:
50
+ if col in ["Baseline", "Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]:
51
+ # Convert string values (e.g., "0.95 (0.0)") to float for comparison, or use direct float values
52
+ if any(" (" in str(x) for x in df[col]):
53
+ # Handle string values with deltas (e.g., "0.95 (0.0)")
54
+ values = [float(str(x).split(" (")[0]) for x in df[col]]
55
+ else:
56
+ # Handle direct float values
57
+ values = df[col].astype(float)
58
+
59
+ max_value = np.max(values)
60
+ styled_df[col] = df[col].apply(lambda x: f"**{x}**" if (float(str(x).split(" (")[0]) if " (" in str(x) else float(x)) == max_value else x)
61
+
62
+ return styled_df
63
+
64
  # Function to create the Gradio interface
65
  def create_leaderboard():
66
+ # Convert data to DataFrames
67
  robustness_df = pd.DataFrame(robustness_data)
68
  context_grounding_df = pd.DataFrame(context_grounding_data)
69
 
70
+ # Format tables to bold highest scores
71
+ robustness_df = format_table(robustness_df)
72
+ context_grounding_df = format_table(context_grounding_df)
73
+
74
+ # Create Gradio interface with a nice theme
75
+ with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
76
+ gr.Markdown("# Financial Model Performance Leaderboard")
77
 
78
+ with gr.Row():
79
+ with gr.Column():
80
+ with gr.Tab("Robustness Results"):
81
+ gr.DataFrame(
82
+ value=robustness_df,
83
+ label="Robustness Results",
84
+ wrap=True,
85
+ height=600, # Increase table height for longer appearance
86
+ elem_classes=["custom-table"] # Custom CSS class for styling
87
+ )
88
+ with gr.Column():
89
+ with gr.Tab("Context Grounding Results"):
90
+ gr.DataFrame(
91
+ value=context_grounding_df,
92
+ label="Context Grounding Results",
93
+ wrap=True,
94
+ height=600, # Increase table height for longer appearance
95
+ elem_classes=["custom-table"] # Custom CSS class for styling
96
+ )
97
+
98
+ # Custom CSS for better table appearance (larger font, spacing)
99
+ demo.css = """
100
+ .custom-table {
101
+ font-size: 16px; /* Increase font size for readability */
102
+ line-height: 2; /* Increase line height for longer rows */
103
+ border-collapse: collapse;
104
+ }
105
+ .custom-table th, .custom-table td {
106
+ padding: 12px; /* Increase padding for spacing */
107
+ border: 1px solid #ddd;
108
+ }
109
+ .custom-table th {
110
+ background-color: #f5f5f5;
111
+ font-weight: bold;
112
+ }
113
+ """
114
 
115
  return demo
116