Commit
e1da145
·
verified ·
1 Parent(s): 64f6484

update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -10
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard
3
  import pandas as pd
4
  import os
5
  import json
@@ -10,7 +9,7 @@ from src.envs import EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH
10
  # Ensure directories exist
11
  os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
12
 
13
- # Minimal CSS to avoid conflicts
14
  minimal_css = """
15
  .container {
16
  max-width: 1200px;
@@ -26,7 +25,6 @@ try:
26
  # Load the leaderboard DataFrame
27
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
28
  print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
29
- print("Sample row:", LEADERBOARD_DF.iloc[0].to_dict() if not LEADERBOARD_DF.empty else "Empty DataFrame")
30
 
31
  # If DataFrame is empty, create a sample
32
  if LEADERBOARD_DF.empty:
@@ -45,7 +43,29 @@ except Exception as e:
45
  "average": 0
46
  }])
47
 
48
- # Create a very simple app with just the leaderboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  with gr.Blocks(css=minimal_css) as demo:
50
  gr.HTML("<div class='header'><h1>ILMAAM: Index for Language Models for Arabic Assessment on Multitasks</h1></div>")
51
 
@@ -53,15 +73,75 @@ with gr.Blocks(css=minimal_css) as demo:
53
  with gr.TabItem("LLM Benchmark"):
54
  # Add debug output
55
  with gr.Accordion("Debug Info", open=True):
56
- gr.Markdown(f"DataFrame Shape: {LEADERBOARD_DF.shape}")
57
- gr.Markdown(f"Column Names: {', '.join(LEADERBOARD_DF.columns[:10])}...")
58
 
59
- # Create a simplified version of the leaderboard
60
- leaderboard = Leaderboard(
61
- value=LEADERBOARD_DF,
62
- interactive=True,
 
 
63
  )
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  with gr.TabItem("About"):
66
  gr.Markdown("This is a benchmark for Arabic language models.")
67
 
 
1
  import gradio as gr
 
2
  import pandas as pd
3
  import os
4
  import json
 
9
  # Ensure directories exist
10
  os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
11
 
12
+ # Minimal CSS
13
  minimal_css = """
14
  .container {
15
  max-width: 1200px;
 
25
  # Load the leaderboard DataFrame
26
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
27
  print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
 
28
 
29
  # If DataFrame is empty, create a sample
30
  if LEADERBOARD_DF.empty:
 
43
  "average": 0
44
  }])
45
 
46
+ # Select common columns for display
47
+ display_cols = ["model_name", "average"]
48
+ # Add some subject columns if they exist
49
+ subject_cols = ["abstract_algebra", "anatomy", "astronomy", "business_ethics"]
50
+ for col in subject_cols:
51
+ if col in LEADERBOARD_DF.columns:
52
+ display_cols.append(col)
53
+ # Add model metadata if they exist
54
+ meta_cols = ["model_type", "precision", "weight_type", "license"]
55
+ for col in meta_cols:
56
+ if col in LEADERBOARD_DF.columns:
57
+ display_cols.append(col)
58
+
59
+ # Filter the DataFrame to only include display columns that actually exist
60
+ actual_display_cols = [col for col in display_cols if col in LEADERBOARD_DF.columns]
61
+ display_df = LEADERBOARD_DF[actual_display_cols].copy()
62
+
63
+ # Round numeric columns for display
64
+ for col in display_df.columns:
65
+ if pd.api.types.is_numeric_dtype(display_df[col]):
66
+ display_df[col] = display_df[col].round(2)
67
+
68
+ # Create a very simple app using standard DataTable instead of Leaderboard
69
  with gr.Blocks(css=minimal_css) as demo:
70
  gr.HTML("<div class='header'><h1>ILMAAM: Index for Language Models for Arabic Assessment on Multitasks</h1></div>")
71
 
 
73
  with gr.TabItem("LLM Benchmark"):
74
  # Add debug output
75
  with gr.Accordion("Debug Info", open=True):
76
+ gr.Markdown(f"DataFrame Shape: {display_df.shape}")
77
+ gr.Markdown(f"Column Names: {', '.join(display_df.columns)}")
78
 
79
+ # Use standard DataTable instead of Leaderboard
80
+ datatable = gr.DataFrame(
81
+ value=display_df,
82
+ interactive=False,
83
+ wrap=True,
84
+ column_widths=[200] + [100] * (len(actual_display_cols) - 1)
85
  )
86
 
87
+ # Add filter functionality using dropdowns
88
+ with gr.Row():
89
+ if "model_type" in display_df.columns:
90
+ model_types = ["All"] + sorted(display_df["model_type"].unique().tolist())
91
+ model_type_filter = gr.Dropdown(
92
+ choices=model_types,
93
+ value="All",
94
+ label="Filter by Model Type",
95
+ interactive=True
96
+ )
97
+
98
+ if "precision" in display_df.columns:
99
+ precisions = ["All"] + sorted(display_df["precision"].unique().tolist())
100
+ precision_filter = gr.Dropdown(
101
+ choices=precisions,
102
+ value="All",
103
+ label="Filter by Precision",
104
+ interactive=True
105
+ )
106
+
107
+ search_input = gr.Textbox(
108
+ label="Search by Model Name",
109
+ placeholder="Enter model name...",
110
+ interactive=True
111
+ )
112
+
113
+ # Filter function
114
+ def filter_data(model_type, precision, search):
115
+ filtered_df = display_df.copy()
116
+
117
+ if model_type != "All" and "model_type" in filtered_df.columns:
118
+ filtered_df = filtered_df[filtered_df["model_type"] == model_type]
119
+
120
+ if precision != "All" and "precision" in filtered_df.columns:
121
+ filtered_df = filtered_df[filtered_df["precision"] == precision]
122
+
123
+ if search and "model_name" in filtered_df.columns:
124
+ filtered_df = filtered_df[filtered_df["model_name"].str.contains(search, case=False)]
125
+
126
+ return filtered_df
127
+
128
+ # Connect filters
129
+ filter_inputs = []
130
+ if "model_type" in display_df.columns:
131
+ filter_inputs.append(model_type_filter)
132
+ if "precision" in display_df.columns:
133
+ filter_inputs.append(precision_filter)
134
+ filter_inputs.append(search_input)
135
+
136
+ # If we have filter inputs, connect them
137
+ if filter_inputs:
138
+ for input_component in filter_inputs:
139
+ input_component.change(
140
+ filter_data,
141
+ inputs=filter_inputs,
142
+ outputs=datatable
143
+ )
144
+
145
  with gr.TabItem("About"):
146
  gr.Markdown("This is a benchmark for Arabic language models.")
147