Commit
64f6484
·
verified ·
1 Parent(s): 8767411

update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -256
app.py CHANGED
@@ -1,280 +1,71 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  import os
5
  import json
6
- from apscheduler.schedulers.background import BackgroundScheduler
7
- from huggingface_hub import snapshot_download
8
-
9
- from src.about import (
10
- CITATION_BUTTON_LABEL,
11
- CITATION_BUTTON_TEXT,
12
- EVALUATION_QUEUE_TEXT,
13
- INTRODUCTION_TEXT,
14
- LLM_BENCHMARKS_TEXT,
15
- TITLE,
16
- )
17
- from src.display.css_html_js import custom_css
18
- from src.display.utils import (
19
- COLUMNS,
20
- COLS,
21
- BENCHMARK_COLS,
22
- EVAL_COLS,
23
- EVAL_TYPES,
24
- ModelType,
25
- WeightType,
26
- Precision
27
- )
28
-
29
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
31
- from src.submission.submit import add_new_eval
32
-
33
- def restart_space():
34
- try:
35
- API.restart_space(repo_id=REPO_ID)
36
- except Exception as e:
37
- print(f"Error restarting space: {e}")
38
 
39
  # Ensure directories exist
40
- os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
41
  os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
42
 
43
- ### Space initialization
44
- try:
45
- print(f"Downloading evaluation requests from {QUEUE_REPO} to {EVAL_REQUESTS_PATH}")
46
- snapshot_download(
47
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset",
48
- tqdm_class=None, etag_timeout=30, token=TOKEN
49
- )
50
- print("Successfully downloaded evaluation requests")
51
- except Exception as e:
52
- print(f"Error downloading evaluation requests: {e}")
53
- # Don't restart immediately, try to continue
54
 
55
  try:
56
- print(f"Downloading evaluation results from {RESULTS_REPO} to {EVAL_RESULTS_PATH}")
57
- snapshot_download(
58
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
59
- tqdm_class=None, etag_timeout=30, token=TOKEN
60
- )
61
- print("Successfully downloaded evaluation results")
62
- except Exception as e:
63
- print(f"Error downloading evaluation results: {e}")
64
- # Don't restart immediately, try to continue
65
-
66
- # Add fallback data in case the remote fetch fails
67
- fallback_data = False
68
- if not os.listdir(EVAL_RESULTS_PATH):
69
- print("No evaluation results found. Creating sample data for testing.")
70
- fallback_data = True
71
- # Create a sample result file for testing
72
- sample_data = {
73
- "config": {
74
- "model_name": "Sample Arabic Model",
75
- "submitted_time": "2023-01-01",
76
- "base_model": "bert-base-arabic",
77
- "revision": "main",
78
- "precision": "float16",
79
- "weight_type": "Original",
80
- "model_type": "Encoder",
81
- "license": "Apache-2.0",
82
- "params": 110000000,
83
- "still_on_hub": True
84
- },
85
- "results": {
86
- "average": 75.5,
87
- "abstract_algebra": 70.2,
88
- "anatomy": 72.5,
89
- "astronomy": 80.1,
90
- "business_ethics": 68.3,
91
- "clinical_knowledge": 75.0,
92
- "college_biology": 77.4,
93
- "college_chemistry": 74.2
94
- }
95
- }
96
-
97
- with open(os.path.join(EVAL_RESULTS_PATH, "sample_result.json"), 'w') as f:
98
- json.dump(sample_data, f)
99
-
100
- # Load the leaderboard DataFrame
101
- try:
102
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
103
  print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
104
- print("LEADERBOARD_DF Columns:", LEADERBOARD_DF.columns.tolist())
105
- print("LEADERBOARD_DF Sample:", LEADERBOARD_DF.head(1).to_dict('records') if not LEADERBOARD_DF.empty else "Empty DataFrame")
106
-
107
- # If DataFrame is empty even with fallback data, create a minimal sample
108
- if LEADERBOARD_DF.empty and fallback_data:
109
- print("Creating minimal sample data for leaderboard")
110
  LEADERBOARD_DF = pd.DataFrame([{
111
- "model_name": "Sample Arabic LLM",
112
- "submitted_time": "2023-01-01",
113
- "base_model": "bert-base-arabic",
114
- "revision": "main",
115
- "precision": "float16",
116
- "weight_type": "Original",
117
- "model_type": "Encoder",
118
- "license": "Apache-2.0",
119
- "params": 110000000,
120
- "still_on_hub": True,
121
  "average": 75.5,
122
- "abstract_algebra": 70.2,
123
- "anatomy": 72.5,
124
- "astronomy": 80.1,
125
- "business_ethics": 68.3,
126
- "clinical_knowledge": 75.0,
127
- "college_biology": 77.4,
128
- "college_chemistry": 74.2
129
  }])
130
  except Exception as e:
131
  print(f"Error loading leaderboard data: {e}")
132
- # Create a minimal sample DataFrame
133
  LEADERBOARD_DF = pd.DataFrame([{
134
  "model_name": "Error Loading Data",
135
  "average": 0
136
  }])
137
 
138
- # Load the evaluation queue DataFrames
139
- try:
140
- finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
141
- except Exception as e:
142
- print(f"Error loading evaluation queue data: {e}")
143
- # Create empty DataFrames
144
- finished_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
145
- running_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
146
- pending_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
147
-
148
- with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
149
- gr.HTML(TITLE)
150
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
151
-
152
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
153
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab", id=0):
154
- if LEADERBOARD_DF.empty:
155
- gr.Markdown("No evaluations have been performed yet. The leaderboard is currently empty.")
156
- else:
157
- # Debug information as Markdown
158
- gr.Markdown("### Leaderboard Data Debug Info")
159
  gr.Markdown(f"DataFrame Shape: {LEADERBOARD_DF.shape}")
160
- gr.Markdown(f"DataFrame Columns: {LEADERBOARD_DF.columns.tolist()}")
161
-
162
- # Get the default columns to display
163
- default_selection = [col.name for col in COLUMNS if col.displayed_by_default]
164
- print("Default Selection before ensuring 'model_name':", default_selection)
165
-
166
- # Ensure "model_name" is included
167
- if "model_name" not in default_selection:
168
- default_selection.insert(0, "model_name")
169
- print("Default Selection after ensuring 'model_name':", default_selection)
170
-
171
- # Make sure all columns exist in the DataFrame
172
- for col in default_selection:
173
- if col not in LEADERBOARD_DF.columns:
174
- print(f"Warning: Column '{col}' not found in DataFrame. Adding empty column.")
175
- LEADERBOARD_DF[col] = None
176
-
177
- print("LEADERBOARD_DF dtypes:\n", LEADERBOARD_DF.dtypes)
178
-
179
- # Create the leaderboard component
180
- leaderboard = Leaderboard(
181
- value=LEADERBOARD_DF,
182
- datatype=[col.type for col in COLUMNS],
183
- select_columns=SelectColumns(
184
- default_selection=default_selection,
185
- cant_deselect=[col.name for col in COLUMNS if col.never_hidden],
186
- label="Select Columns to Display:",
187
- ),
188
- search_columns=["model_name", "license"],
189
- hide_columns=[col.name for col in COLUMNS if col.hidden],
190
- filter_columns=[
191
- ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
192
- ColumnFilter("precision", type="checkboxgroup", label="Precision"),
193
- ColumnFilter(
194
- "still_on_hub", type="boolean", label="Deleted/incomplete", default=True
195
- ),
196
- ],
197
- bool_checkboxgroup_label="Hide models",
198
- interactive=True, # Change to True to enable interaction
199
- )
200
-
201
- with gr.TabItem("📝 About", elem_id="about-tab", id=1):
202
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
203
-
204
- with gr.TabItem("🚀 Submit here!", elem_id="submit-tab", id=2):
205
- with gr.Column():
206
- with gr.Row():
207
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
208
-
209
- # Since the evaluation queues are empty, display a message
210
- with gr.Column():
211
- gr.Markdown("Evaluations are performed immediately upon submission. There are no pending or running evaluations.")
212
-
213
- with gr.Row():
214
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
215
-
216
- with gr.Row():
217
- with gr.Column():
218
- model_name_textbox = gr.Textbox(label="Model name")
219
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
220
- model_type = gr.Dropdown(
221
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
222
- label="Model type",
223
- multiselect=False,
224
- value=None,
225
- interactive=True,
226
- )
227
-
228
- with gr.Column():
229
- precision = gr.Dropdown(
230
- choices=[i.value for i in Precision if i != Precision.Unknown],
231
- label="Precision",
232
- multiselect=False,
233
- value="float16",
234
- interactive=True,
235
- )
236
- weight_type = gr.Dropdown(
237
- choices=[i.value for i in WeightType],
238
- label="Weights type",
239
- multiselect=False,
240
- value="Original",
241
- interactive=True,
242
- )
243
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
244
-
245
- submit_button = gr.Button("Submit Eval")
246
- submission_result = gr.Markdown()
247
- submit_button.click(
248
- add_new_eval,
249
- [
250
- model_name_textbox,
251
- base_model_name_textbox,
252
- revision_name_textbox,
253
- precision,
254
- weight_type,
255
- model_type,
256
- ],
257
- submission_result,
258
  )
259
-
260
- with gr.Row():
261
- with gr.Accordion("📙 Citation", open=False):
262
- citation_button = gr.Textbox(
263
- value=CITATION_BUTTON_TEXT,
264
- label=CITATION_BUTTON_LABEL,
265
- lines=20,
266
- elem_id="citation-button",
267
- show_copy_button=True,
268
- )
269
-
270
- scheduler = BackgroundScheduler()
271
- # Run every 30 minutes instead of every 30 seconds (1800 seconds = 30 minutes)
272
- scheduler.add_job(restart_space, "interval", seconds=1800)
273
- scheduler.start()
274
-
275
- # Launch with a more descriptive message
276
- demo.queue(default_concurrency_limit=40).launch(
277
- debug=True,
278
- share=False,
279
- show_error=True
280
- )
 
1
  import gradio as gr
2
+ from gradio_leaderboard import Leaderboard
3
  import pandas as pd
4
  import os
5
  import json
6
+ from src.populate import get_leaderboard_df
7
+ from src.display.utils import COLUMNS, COLS, BENCHMARK_COLS
8
+ from src.envs import EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Ensure directories exist
 
11
  os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
12
 
13
+ # Minimal CSS to avoid conflicts
14
+ minimal_css = """
15
+ .container {
16
+ max-width: 1200px;
17
+ margin: 0 auto;
18
+ }
19
+ .header {
20
+ text-align: center;
21
+ margin-bottom: 20px;
22
+ }
23
+ """
24
 
25
  try:
26
+ # Load the leaderboard DataFrame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
28
  print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
29
+ print("Sample row:", LEADERBOARD_DF.iloc[0].to_dict() if not LEADERBOARD_DF.empty else "Empty DataFrame")
30
+
31
+ # If DataFrame is empty, create a sample
32
+ if LEADERBOARD_DF.empty:
33
+ print("Creating sample data for testing")
 
34
  LEADERBOARD_DF = pd.DataFrame([{
35
+ "model_name": "Sample Model",
 
 
 
 
 
 
 
 
 
36
  "average": 75.5,
37
+ "model_type": "Encoder",
38
+ "precision": "float16"
 
 
 
 
 
39
  }])
40
  except Exception as e:
41
  print(f"Error loading leaderboard data: {e}")
42
+ # Create a minimal DataFrame
43
  LEADERBOARD_DF = pd.DataFrame([{
44
  "model_name": "Error Loading Data",
45
  "average": 0
46
  }])
47
 
48
+ # Create a very simple app with just the leaderboard
49
+ with gr.Blocks(css=minimal_css) as demo:
50
+ gr.HTML("<div class='header'><h1>ILMAAM: Index for Language Models for Arabic Assessment on Multitasks</h1></div>")
51
+
52
+ with gr.Tabs() as tabs:
53
+ with gr.TabItem("LLM Benchmark"):
54
+ # Add debug output
55
+ with gr.Accordion("Debug Info", open=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  gr.Markdown(f"DataFrame Shape: {LEADERBOARD_DF.shape}")
57
+ gr.Markdown(f"Column Names: {', '.join(LEADERBOARD_DF.columns[:10])}...")
58
+
59
+ # Create a simplified version of the leaderboard
60
+ leaderboard = Leaderboard(
61
+ value=LEADERBOARD_DF,
62
+ interactive=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  )
64
+
65
+ with gr.TabItem("About"):
66
+ gr.Markdown("This is a benchmark for Arabic language models.")
67
+
68
+ with gr.TabItem("Submit"):
69
+ gr.Markdown("Submission form will be available here.")
70
+
71
+ demo.launch(debug=True, share=False)