Update app.py
Browse files
app.py
CHANGED
@@ -8,24 +8,40 @@ import re
|
|
8 |
|
9 |
from huggingface_hub import login
|
10 |
|
|
|
11 |
token = os.environ.get("HG_TOKEN")
|
12 |
-
|
|
|
13 |
|
|
|
14 |
try:
|
15 |
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
|
16 |
references = {row["id"]: row["text"] for row in dataset}
|
|
|
17 |
except Exception as e:
|
|
|
18 |
references = {}
|
19 |
|
|
|
20 |
leaderboard_file = "leaderboard.csv"
|
21 |
if not os.path.exists(leaderboard_file):
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
else:
|
24 |
leaderboard_df = pd.read_csv(leaderboard_file)
|
25 |
|
|
|
26 |
if "Combined_Score" not in leaderboard_df.columns:
|
27 |
leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
|
28 |
leaderboard_df.to_csv(leaderboard_file, index=False)
|
|
|
|
|
29 |
|
30 |
def normalize_text(text):
|
31 |
"""Normalize text for WER/CER calculation"""
|
@@ -62,6 +78,7 @@ def calculate_metrics(predictions_df):
|
|
62 |
sample_wer = wer(reference, hypothesis)
|
63 |
sample_cer = cer(reference, hypothesis)
|
64 |
|
|
|
65 |
sample_wer = min(sample_wer, 2.0)
|
66 |
sample_cer = min(sample_cer, 2.0)
|
67 |
|
@@ -77,7 +94,8 @@ def calculate_metrics(predictions_df):
|
|
77 |
"wer": sample_wer,
|
78 |
"cer": sample_cer
|
79 |
})
|
80 |
-
except Exception:
|
|
|
81 |
pass
|
82 |
|
83 |
if not results:
|
@@ -98,22 +116,25 @@ def format_as_percentage(value):
|
|
98 |
|
99 |
def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
|
100 |
"""Format leaderboard for display with ranking and percentages"""
|
101 |
-
if len(df) == 0:
|
102 |
return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
103 |
|
104 |
-
|
105 |
display_df = df.copy()
|
106 |
|
|
|
107 |
display_df = display_df.sort_values(sort_by)
|
108 |
|
|
|
109 |
display_df.insert(0, "Rank", range(1, len(display_df) + 1))
|
110 |
|
|
|
111 |
for col in ["WER", "CER", "Combined_Score"]:
|
112 |
if col in display_df.columns:
|
113 |
display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
|
114 |
-
display_df = display_df.drop(col, axis=1)
|
115 |
|
116 |
-
#
|
|
|
117 |
|
118 |
return display_df
|
119 |
|
@@ -133,10 +154,18 @@ def update_ranking(method):
|
|
133 |
|
134 |
return prepare_leaderboard_for_display(current_lb, sort_column)
|
135 |
|
136 |
-
except Exception:
|
|
|
137 |
return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
138 |
|
139 |
def process_submission(model_name, csv_file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
try:
|
141 |
df = pd.read_csv(csv_file)
|
142 |
|
@@ -162,28 +191,42 @@ def process_submission(model_name, csv_file):
|
|
162 |
try:
|
163 |
avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
|
164 |
|
165 |
-
# suspiciously low values
|
166 |
if avg_wer < 0.001:
|
167 |
return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
|
168 |
|
169 |
except Exception as e:
|
170 |
return f"Error calculating metrics: {str(e)}", None
|
171 |
|
|
|
172 |
leaderboard = pd.read_csv(leaderboard_file)
|
173 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
174 |
|
175 |
# Calculate combined score (70% WER, 30% CER)
|
176 |
combined_score = avg_wer * 0.7 + avg_cer * 0.3
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
183 |
-
|
184 |
-
updated_leaderboard =
|
185 |
updated_leaderboard.to_csv(leaderboard_file, index=False)
|
186 |
|
|
|
187 |
display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
|
188 |
|
189 |
return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard
|
@@ -191,29 +234,56 @@ def process_submission(model_name, csv_file):
|
|
191 |
except Exception as e:
|
192 |
return f"Error processing submission: {str(e)}", None
|
193 |
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
gr.Markdown(
|
196 |
"""
|
197 |
# π²π± Bambara ASR Leaderboard
|
198 |
|
199 |
-
This leaderboard
|
200 |
-
Models are ranked based on
|
|
|
|
|
201 |
"""
|
202 |
)
|
203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
with gr.Tabs() as tabs:
|
205 |
-
with gr.TabItem("π
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
if "Combined_Score" not in current_leaderboard.columns:
|
210 |
-
current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
|
211 |
-
|
212 |
-
display_leaderboard = prepare_leaderboard_for_display(current_leaderboard)
|
213 |
-
except Exception:
|
214 |
-
display_leaderboard = pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
215 |
-
|
216 |
-
gr.Markdown("### Current ASR Model Rankings")
|
217 |
|
218 |
ranking_method = gr.Radio(
|
219 |
["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
|
@@ -222,7 +292,7 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
|
|
222 |
)
|
223 |
|
224 |
leaderboard_view = gr.DataFrame(
|
225 |
-
value=
|
226 |
interactive=False,
|
227 |
label="Models are ranked by selected metric - lower is better"
|
228 |
)
|
@@ -233,34 +303,60 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
|
|
233 |
outputs=[leaderboard_view]
|
234 |
)
|
235 |
|
236 |
-
gr.
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
with gr.TabItem("π Submit New Results"):
|
246 |
gr.Markdown(
|
247 |
"""
|
248 |
### Submit a new model for evaluation
|
249 |
|
250 |
-
Upload a CSV file with
|
251 |
-
|
|
|
|
|
252 |
"""
|
253 |
)
|
254 |
|
255 |
with gr.Row():
|
256 |
-
model_name_input = gr.Textbox(
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
-
submit_btn = gr.Button("Submit")
|
260 |
output_msg = gr.Textbox(label="Status", interactive=False)
|
261 |
leaderboard_display = gr.DataFrame(
|
262 |
label="Updated Leaderboard",
|
263 |
-
value=
|
264 |
interactive=False
|
265 |
)
|
266 |
|
@@ -269,6 +365,49 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
|
|
269 |
inputs=[model_name_input, csv_upload],
|
270 |
outputs=[output_msg, leaderboard_display]
|
271 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
|
273 |
if __name__ == "__main__":
|
274 |
demo.launch()
|
|
|
8 |
|
9 |
from huggingface_hub import login
|
10 |
|
11 |
+
# Login to Hugging Face Hub (if token is available)
|
12 |
token = os.environ.get("HG_TOKEN")
|
13 |
+
if token:
|
14 |
+
login(token)
|
15 |
|
16 |
+
# Load reference dataset
|
17 |
try:
|
18 |
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
|
19 |
references = {row["id"]: row["text"] for row in dataset}
|
20 |
+
print(f"Loaded {len(references)} reference transcriptions")
|
21 |
except Exception as e:
|
22 |
+
print(f"Error loading dataset: {str(e)}")
|
23 |
references = {}
|
24 |
|
25 |
+
# Initialize or load the leaderboard file
|
26 |
leaderboard_file = "leaderboard.csv"
|
27 |
if not os.path.exists(leaderboard_file):
|
28 |
+
# Create a new leaderboard with sample data for testing
|
29 |
+
sample_data = [
|
30 |
+
["MALIBA-AI/bambara-asr-v1", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
|
31 |
+
["whisper-large-v3-bambara", 0.3120, 0.1870, 0.2745, "2025-02-20 14:22:33"]
|
32 |
+
]
|
33 |
+
pd.DataFrame(sample_data,
|
34 |
+
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
|
35 |
+
print(f"Created new leaderboard file with sample data")
|
36 |
else:
|
37 |
leaderboard_df = pd.read_csv(leaderboard_file)
|
38 |
|
39 |
+
# Ensure the Combined_Score column exists
|
40 |
if "Combined_Score" not in leaderboard_df.columns:
|
41 |
leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
|
42 |
leaderboard_df.to_csv(leaderboard_file, index=False)
|
43 |
+
print(f"Added Combined_Score column to existing leaderboard")
|
44 |
+
print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
|
45 |
|
46 |
def normalize_text(text):
|
47 |
"""Normalize text for WER/CER calculation"""
|
|
|
78 |
sample_wer = wer(reference, hypothesis)
|
79 |
sample_cer = cer(reference, hypothesis)
|
80 |
|
81 |
+
# Cap extreme values to prevent outliers from skewing results
|
82 |
sample_wer = min(sample_wer, 2.0)
|
83 |
sample_cer = min(sample_cer, 2.0)
|
84 |
|
|
|
94 |
"wer": sample_wer,
|
95 |
"cer": sample_cer
|
96 |
})
|
97 |
+
except Exception as e:
|
98 |
+
print(f"Error processing sample {id_val}: {str(e)}")
|
99 |
pass
|
100 |
|
101 |
if not results:
|
|
|
116 |
|
117 |
def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
|
118 |
"""Format leaderboard for display with ranking and percentages"""
|
119 |
+
if df is None or len(df) == 0:
|
120 |
return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
121 |
|
122 |
+
# Make a copy to avoid modifying the original
|
123 |
display_df = df.copy()
|
124 |
|
125 |
+
# Sort by the selected metric (lower is better)
|
126 |
display_df = display_df.sort_values(sort_by)
|
127 |
|
128 |
+
# Add ranking column
|
129 |
display_df.insert(0, "Rank", range(1, len(display_df) + 1))
|
130 |
|
131 |
+
# Format numeric columns as percentages
|
132 |
for col in ["WER", "CER", "Combined_Score"]:
|
133 |
if col in display_df.columns:
|
134 |
display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
|
|
|
135 |
|
136 |
+
# Keep both the raw values and percentage displays
|
137 |
+
# This allows for proper sorting while showing formatted values
|
138 |
|
139 |
return display_df
|
140 |
|
|
|
154 |
|
155 |
return prepare_leaderboard_for_display(current_lb, sort_column)
|
156 |
|
157 |
+
except Exception as e:
|
158 |
+
print(f"Error updating ranking: {str(e)}")
|
159 |
return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
160 |
|
161 |
def process_submission(model_name, csv_file):
|
162 |
+
"""Process a new model submission"""
|
163 |
+
if not model_name or not model_name.strip():
|
164 |
+
return "Error: Please provide a model name.", None
|
165 |
+
|
166 |
+
if not csv_file:
|
167 |
+
return "Error: Please upload a CSV file.", None
|
168 |
+
|
169 |
try:
|
170 |
df = pd.read_csv(csv_file)
|
171 |
|
|
|
191 |
try:
|
192 |
avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
|
193 |
|
194 |
+
# Check for suspiciously low values
|
195 |
if avg_wer < 0.001:
|
196 |
return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
|
197 |
|
198 |
except Exception as e:
|
199 |
return f"Error calculating metrics: {str(e)}", None
|
200 |
|
201 |
+
# Load existing leaderboard
|
202 |
leaderboard = pd.read_csv(leaderboard_file)
|
203 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
204 |
|
205 |
# Calculate combined score (70% WER, 30% CER)
|
206 |
combined_score = avg_wer * 0.7 + avg_cer * 0.3
|
207 |
|
208 |
+
# Check if model already exists
|
209 |
+
if model_name in leaderboard["Model_Name"].values:
|
210 |
+
# Update existing entry
|
211 |
+
idx = leaderboard[leaderboard["Model_Name"] == model_name].index
|
212 |
+
leaderboard.loc[idx, "WER"] = avg_wer
|
213 |
+
leaderboard.loc[idx, "CER"] = avg_cer
|
214 |
+
leaderboard.loc[idx, "Combined_Score"] = combined_score
|
215 |
+
leaderboard.loc[idx, "timestamp"] = timestamp
|
216 |
+
updated_leaderboard = leaderboard
|
217 |
+
else:
|
218 |
+
# Add new entry
|
219 |
+
new_entry = pd.DataFrame(
|
220 |
+
[[model_name, avg_wer, avg_cer, combined_score, timestamp]],
|
221 |
+
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
|
222 |
+
)
|
223 |
+
updated_leaderboard = pd.concat([leaderboard, new_entry])
|
224 |
|
225 |
+
# Sort and save updated leaderboard
|
226 |
+
updated_leaderboard = updated_leaderboard.sort_values("Combined_Score")
|
227 |
updated_leaderboard.to_csv(leaderboard_file, index=False)
|
228 |
|
229 |
+
# Prepare for display
|
230 |
display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
|
231 |
|
232 |
return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard
|
|
|
234 |
except Exception as e:
|
235 |
return f"Error processing submission: {str(e)}", None
|
236 |
|
237 |
+
def get_current_leaderboard():
|
238 |
+
"""Get the current leaderboard data for display"""
|
239 |
+
try:
|
240 |
+
if os.path.exists(leaderboard_file):
|
241 |
+
current_leaderboard = pd.read_csv(leaderboard_file)
|
242 |
+
|
243 |
+
if "Combined_Score" not in current_leaderboard.columns:
|
244 |
+
current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
|
245 |
+
current_leaderboard.to_csv(leaderboard_file, index=False)
|
246 |
+
|
247 |
+
return current_leaderboard
|
248 |
+
else:
|
249 |
+
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
|
250 |
+
except Exception as e:
|
251 |
+
print(f"Error getting leaderboard: {str(e)}")
|
252 |
+
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
|
253 |
+
|
254 |
+
def create_leaderboard_table():
|
255 |
+
"""Create and format the leaderboard table for display"""
|
256 |
+
leaderboard_data = get_current_leaderboard()
|
257 |
+
return prepare_leaderboard_for_display(leaderboard_data)
|
258 |
+
|
259 |
+
with gr.Blocks(title="Bambara ASR Leaderboard", theme=gr.themes.Soft()) as demo:
|
260 |
gr.Markdown(
|
261 |
"""
|
262 |
# π²π± Bambara ASR Leaderboard
|
263 |
|
264 |
+
This leaderboard tracks and evaluates speech recognition models for the Bambara language.
|
265 |
+
Models are ranked based on Word Error Rate (WER), Character Error Rate (CER), and a combined score.
|
266 |
+
|
267 |
+
## Current Models Performance
|
268 |
"""
|
269 |
)
|
270 |
|
271 |
+
current_data = get_current_leaderboard()
|
272 |
+
|
273 |
+
# Highlight top-performing model
|
274 |
+
if len(current_data) > 0:
|
275 |
+
best_model = current_data.sort_values("Combined_Score").iloc[0]
|
276 |
+
gr.Markdown(f"""
|
277 |
+
### π Current Best Model: **{best_model['Model_Name']}**
|
278 |
+
* WER: **{best_model['WER']*100:.2f}%**
|
279 |
+
* CER: **{best_model['CER']*100:.2f}%**
|
280 |
+
* Combined Score: **{best_model['Combined_Score']*100:.2f}%**
|
281 |
+
""")
|
282 |
+
|
283 |
with gr.Tabs() as tabs:
|
284 |
+
with gr.TabItem("π
Model Rankings"):
|
285 |
+
# Pre-load the leaderboard data
|
286 |
+
initial_leaderboard = create_leaderboard_table()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
ranking_method = gr.Radio(
|
289 |
["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
|
|
|
292 |
)
|
293 |
|
294 |
leaderboard_view = gr.DataFrame(
|
295 |
+
value=initial_leaderboard,
|
296 |
interactive=False,
|
297 |
label="Models are ranked by selected metric - lower is better"
|
298 |
)
|
|
|
303 |
outputs=[leaderboard_view]
|
304 |
)
|
305 |
|
306 |
+
with gr.Accordion("Metrics Explanation", open=False):
|
307 |
+
gr.Markdown(
|
308 |
+
"""
|
309 |
+
## Understanding ASR Metrics
|
310 |
+
|
311 |
+
### Word Error Rate (WER)
|
312 |
+
WER measures how accurately the ASR system recognizes whole words:
|
313 |
+
* Lower values indicate better performance
|
314 |
+
* Calculated as: (Substitutions + Insertions + Deletions) / Total Words
|
315 |
+
* A WER of 0% means perfect transcription
|
316 |
+
* A WER of 20% means approximately 1 in 5 words contains an error
|
317 |
+
|
318 |
+
### Character Error Rate (CER)
|
319 |
+
CER measures accuracy at the character level:
|
320 |
+
* More fine-grained than WER
|
321 |
+
* Better at capturing partial word matches
|
322 |
+
* Particularly useful for agglutinative languages like Bambara
|
323 |
+
|
324 |
+
### Combined Score
|
325 |
+
* Weighted average: 70% WER + 30% CER
|
326 |
+
* Provides a balanced evaluation of model performance
|
327 |
+
* Used as the primary ranking metric
|
328 |
+
"""
|
329 |
+
)
|
330 |
|
331 |
with gr.TabItem("π Submit New Results"):
|
332 |
gr.Markdown(
|
333 |
"""
|
334 |
### Submit a new model for evaluation
|
335 |
|
336 |
+
Upload a CSV file with the following format:
|
337 |
+
* Must contain exactly two columns: 'id' and 'text'
|
338 |
+
* The 'id' column should match the reference dataset IDs
|
339 |
+
* The 'text' column should contain your model's transcriptions
|
340 |
"""
|
341 |
)
|
342 |
|
343 |
with gr.Row():
|
344 |
+
model_name_input = gr.Textbox(
|
345 |
+
label="Model Name",
|
346 |
+
placeholder="e.g., MALIBA-AI/bambara-asr",
|
347 |
+
info="Use a descriptive name to identify your model"
|
348 |
+
)
|
349 |
+
csv_upload = gr.File(
|
350 |
+
label="Upload CSV File",
|
351 |
+
file_types=[".csv"],
|
352 |
+
info="CSV with columns: id, text"
|
353 |
+
)
|
354 |
|
355 |
+
submit_btn = gr.Button("Submit", variant="primary")
|
356 |
output_msg = gr.Textbox(label="Status", interactive=False)
|
357 |
leaderboard_display = gr.DataFrame(
|
358 |
label="Updated Leaderboard",
|
359 |
+
value=initial_leaderboard,
|
360 |
interactive=False
|
361 |
)
|
362 |
|
|
|
365 |
inputs=[model_name_input, csv_upload],
|
366 |
outputs=[output_msg, leaderboard_display]
|
367 |
)
|
368 |
+
|
369 |
+
with gr.TabItem("π Benchmark Dataset"):
|
370 |
+
gr.Markdown(
|
371 |
+
"""
|
372 |
+
## About the Benchmark Dataset
|
373 |
+
|
374 |
+
This leaderboard uses the **[sudoping01/bambara-speech-recognition-benchmark](https://huggingface.co/datasets/sudoping01/bambara-speech-recognition-benchmark)** dataset:
|
375 |
+
|
376 |
+
* Contains diverse Bambara speech samples
|
377 |
+
* Includes various speakers, accents, and dialects
|
378 |
+
* Covers different speech styles and recording conditions
|
379 |
+
* Professionally transcribed and validated
|
380 |
+
|
381 |
+
### How to Generate Predictions
|
382 |
+
|
383 |
+
To submit results to this leaderboard:
|
384 |
+
|
385 |
+
1. Download the audio files from the benchmark dataset
|
386 |
+
2. Run your ASR model on the audio files
|
387 |
+
3. Generate a CSV file with 'id' and 'text' columns
|
388 |
+
4. Submit your results using the form in the "Submit New Results" tab
|
389 |
+
|
390 |
+
### Evaluation Guidelines
|
391 |
+
|
392 |
+
* Text is normalized (lowercase, punctuation removed) before metrics calculation
|
393 |
+
* Extreme outliers are capped to prevent skewing results
|
394 |
+
* All submissions are validated for format and completeness
|
395 |
+
"""
|
396 |
+
)
|
397 |
+
|
398 |
+
gr.Markdown(
|
399 |
+
"""
|
400 |
+
---
|
401 |
+
### About MALIBA-AI
|
402 |
+
|
403 |
+
**MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation**
|
404 |
+
|
405 |
+
*"No Malian Language Left Behind"*
|
406 |
+
|
407 |
+
This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
|
408 |
+
For more information, visit [MALIBA-AI on Hugging Face](https://huggingface.co/MALIBA-AI).
|
409 |
+
"""
|
410 |
+
)
|
411 |
|
412 |
if __name__ == "__main__":
|
413 |
demo.launch()
|