BounharAbdelaziz commited on
Commit
fdfa8a9
·
verified ·
1 Parent(s): 029f92c

Implemented one-vs-all leaderboard

Browse files
Files changed (4) hide show
  1. app.py +49 -28
  2. darija_leaderboard_multilingual.json +1 -1376
  3. requirements.txt +6 -5
  4. utils.py +197 -49
app.py CHANGED
@@ -1,17 +1,19 @@
1
- import os
2
- import pandas as pd
3
  from utils import (
4
  update_leaderboard_multilingual,
 
5
  handle_evaluation,
6
  process_results_file,
7
  create_html_image,
8
  )
 
9
  from datasets import load_dataset
10
  import gradio as gr
11
 
12
  if __name__ == "__main__":
 
13
  # Evaluation dataset path
14
- DATA_PATH = "atlasia/No-Arabic-Dialect-Left-Behind-Filtered-Balanced"
 
15
  # All Metrics
16
  metrics = [
17
  'f1_score',
@@ -43,9 +45,12 @@ if __name__ == "__main__":
43
 
44
  # Load test dataset
45
  test_dataset = load_dataset(DATA_PATH, split='test')
46
- # Supported dialects
47
- supported_dialects = list(test_dataset.unique("dialect")) + ['All']
48
 
 
 
 
 
 
49
  with gr.Blocks() as app:
50
  base_path = os.path.dirname(__file__)
51
  local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
@@ -99,25 +104,26 @@ if __name__ == "__main__":
99
  A kind of one-vs-all approach for evaluating LID models across multiple arabic dialects.
100
  Computes the `false_positive_rate` of different models for a given target language.
101
  This should help you understand how well a model can identify a specific dialect by
102
- showing how often it misclassifies other dialects as the target dialect.
103
  """
104
  )
105
 
106
- with gr.Column(scale=1):
107
- gr.Markdown("### Select target language")
108
- target_language_selector = gr.Dropdown(
109
- choices=supported_dialects,
110
- value='Morocco', # Default to Morocco of course
111
- label="Target Language"
112
- )
113
-
114
- with gr.Column(scale=2):
115
- gr.Markdown("### Select Languages to display")
116
- languages_checkboxes = gr.CheckboxGroup(
117
- choices=supported_dialects,
118
- value=default_languages,
119
- label="Languages"
120
- )
 
121
 
122
  with gr.Row():
123
  binary_leaderboard_table = gr.DataFrame(
@@ -135,7 +141,14 @@ if __name__ == "__main__":
135
  use_mapping = gr.Checkbox(label="Does not map to country")
136
  eval_button = gr.Button("Evaluate", value=False) # Initially disabled
137
 
138
- eval_button.click(handle_evaluation, inputs=[model_path, model_path_bin, use_mapping], outputs=[leaderboard_table])
 
 
 
 
 
 
 
139
 
140
  with gr.Tab("Upload your results"):
141
 
@@ -160,8 +173,10 @@ if __name__ == "__main__":
160
 
161
  ```
162
  """
 
163
  gr.Markdown("## Upload your results to the leaderboard 🚀")
164
  gr.Markdown("### Submission guidelines: Run the test dataset on your model and save the results in a CSV file. Bellow a code snippet to help you with that.")
 
165
  gr.Markdown(code_snippet)
166
 
167
  uploaded_model_name = gr.Textbox(label="Model name", placeholder='Your model/team name')
@@ -184,18 +199,24 @@ if __name__ == "__main__":
184
 
185
  # Update binary table when any input changes
186
  target_language_selector.change(
187
- update_leaderboard_multilingual,
188
- inputs=[country_selector, metric_checkboxes],
189
- outputs=leaderboard_table
190
  )
191
 
192
  languages_checkboxes.change(
193
- update_leaderboard_multilingual,
194
- inputs=[country_selector, metric_checkboxes],
195
- outputs=leaderboard_table
196
  )
197
 
198
  # Define load event to run at startup
 
 
 
 
 
 
199
  app.load(
200
  update_leaderboard_multilingual,
201
  inputs=[country_selector, metric_checkboxes],
 
 
 
1
  from utils import (
2
  update_leaderboard_multilingual,
3
+ update_leaderboard_one_vs_all,
4
  handle_evaluation,
5
  process_results_file,
6
  create_html_image,
7
  )
8
+ import os
9
  from datasets import load_dataset
10
  import gradio as gr
11
 
12
  if __name__ == "__main__":
13
+
14
  # Evaluation dataset path
15
+ DATA_PATH = "atlasia/Arabic-LID-Leaderboard"
16
+
17
  # All Metrics
18
  metrics = [
19
  'f1_score',
 
45
 
46
  # Load test dataset
47
  test_dataset = load_dataset(DATA_PATH, split='test')
 
 
48
 
49
+ # Supported dialects
50
+ all_target_languages = list(test_dataset.unique("dialect"))
51
+ supported_dialects = all_target_languages + ['All']
52
+ languages_to_display_one_vs_all = all_target_languages # everything except All
53
+
54
  with gr.Blocks() as app:
55
  base_path = os.path.dirname(__file__)
56
  local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
 
104
  A kind of one-vs-all approach for evaluating LID models across multiple arabic dialects.
105
  Computes the `false_positive_rate` of different models for a given target language.
106
  This should help you understand how well a model can identify a specific dialect by
107
+ showing **how often it misclassifies other dialects as the target dialect**.
108
  """
109
  )
110
 
111
+ with gr.Row():
112
+ with gr.Column(scale=1):
113
+ gr.Markdown("### Select your target language")
114
+ target_language_selector = gr.Dropdown(
115
+ choices=languages_to_display_one_vs_all,
116
+ value='Morocco', # Default to Morocco of course
117
+ label="Target Language"
118
+ )
119
+
120
+ with gr.Column(scale=2):
121
+ gr.Markdown("### Select languages to compare to")
122
+ languages_checkboxes = gr.CheckboxGroup(
123
+ choices=languages_to_display_one_vs_all,
124
+ value=default_languages,
125
+ label="Languages"
126
+ )
127
 
128
  with gr.Row():
129
  binary_leaderboard_table = gr.DataFrame(
 
141
  use_mapping = gr.Checkbox(label="Does not map to country")
142
  eval_button = gr.Button("Evaluate", value=False) # Initially disabled
143
 
144
+ # Status message area
145
+ status_message = gr.Markdown(value="")
146
+ def update_status_message():
147
+ return "### **⚠️Evaluating... Please wait...**"
148
+
149
+ eval_button.click(update_status_message, outputs=[status_message])
150
+
151
+ eval_button.click(handle_evaluation, inputs=[model_path, model_path_bin, use_mapping], outputs=[leaderboard_table, status_message])
152
 
153
  with gr.Tab("Upload your results"):
154
 
 
173
 
174
  ```
175
  """
176
+
177
  gr.Markdown("## Upload your results to the leaderboard 🚀")
178
  gr.Markdown("### Submission guidelines: Run the test dataset on your model and save the results in a CSV file. Bellow a code snippet to help you with that.")
179
+ gr.Markdown("### Nota Bene: The One-vs-All leaderboard evaluation is currently unavailable with the csv upload but will be implemented soon. Stay tuned!")
180
  gr.Markdown(code_snippet)
181
 
182
  uploaded_model_name = gr.Textbox(label="Model name", placeholder='Your model/team name')
 
199
 
200
  # Update binary table when any input changes
201
  target_language_selector.change(
202
+ update_leaderboard_one_vs_all,
203
+ inputs=[target_language_selector, languages_checkboxes],
204
+ outputs=[binary_leaderboard_table, languages_checkboxes]
205
  )
206
 
207
  languages_checkboxes.change(
208
+ update_leaderboard_one_vs_all,
209
+ inputs=[target_language_selector, languages_checkboxes],
210
+ outputs=[binary_leaderboard_table, languages_checkboxes]
211
  )
212
 
213
  # Define load event to run at startup
214
+ app.load(
215
+ update_leaderboard_one_vs_all,
216
+ inputs=[target_language_selector, languages_checkboxes],
217
+ outputs=[binary_leaderboard_table, languages_checkboxes]
218
+ )
219
+
220
  app.load(
221
  update_leaderboard_multilingual,
222
  inputs=[country_selector, metric_checkboxes],
darija_leaderboard_multilingual.json CHANGED
@@ -1,1378 +1,3 @@
1
  [
2
- {
3
- "MSA": {
4
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
5
- "f1_score": 0.9749,
6
- "precision": 0.9908,
7
- "recall": 0.9594,
8
- "specificity": 0.9942,
9
- "false_positive_rate": 0.0058,
10
- "false_negative_rate": 0.0406,
11
- "negative_predictive_value": 0.974,
12
- "n_test_samples": 54390
13
- },
14
- "cis-lmu/glotlid/model.bin": {
15
- "f1_score": 0.9554,
16
- "precision": 0.9252,
17
- "recall": 0.9876,
18
- "specificity": 0.9478,
19
- "false_positive_rate": 0.0522,
20
- "false_negative_rate": 0.0124,
21
- "negative_predictive_value": 0.9915,
22
- "n_test_samples": 54390
23
- },
24
- "laurievb/OpenLID/model.bin": {
25
- "f1_score": 0.9264,
26
- "precision": 0.9359,
27
- "recall": 0.9172,
28
- "specificity": 0.9589,
29
- "false_positive_rate": 0.0411,
30
- "false_negative_rate": 0.0828,
31
- "negative_predictive_value": 0.9465,
32
- "n_test_samples": 54390
33
- },
34
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
35
- "f1_score": 0.0,
36
- "precision": 0.0,
37
- "recall": 0.0,
38
- "specificity": 1.0,
39
- "false_positive_rate": 0.0,
40
- "false_negative_rate": 1.0,
41
- "negative_predictive_value": 0.6047,
42
- "n_test_samples": 54390
43
- }
44
- }
45
- },
46
- {
47
- "Pakistan": {
48
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
49
- "f1_score": 0.9963,
50
- "precision": 0.9999,
51
- "recall": 0.9927,
52
- "specificity": 0.9999,
53
- "false_positive_rate": 0.0001,
54
- "false_negative_rate": 0.0073,
55
- "negative_predictive_value": 0.9958,
56
- "n_test_samples": 50000
57
- },
58
- "cis-lmu/glotlid/model.bin": {
59
- "f1_score": 0.999,
60
- "precision": 0.9989,
61
- "recall": 0.9991,
62
- "specificity": 0.9994,
63
- "false_positive_rate": 0.0006,
64
- "false_negative_rate": 0.0009,
65
- "negative_predictive_value": 0.9995,
66
- "n_test_samples": 50000
67
- },
68
- "laurievb/OpenLID/model.bin": {
69
- "f1_score": 0.9927,
70
- "precision": 0.9928,
71
- "recall": 0.9925,
72
- "specificity": 0.9959,
73
- "false_positive_rate": 0.0041,
74
- "false_negative_rate": 0.0075,
75
- "negative_predictive_value": 0.9957,
76
- "n_test_samples": 50000
77
- },
78
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
79
- "f1_score": 0.0,
80
- "precision": 0.0,
81
- "recall": 0.0,
82
- "specificity": 1.0,
83
- "false_positive_rate": 0.0,
84
- "false_negative_rate": 1.0,
85
- "negative_predictive_value": 0.6366,
86
- "n_test_samples": 50000
87
- }
88
- }
89
- },
90
- {
91
- "Morocco": {
92
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
93
- "f1_score": 0.8789,
94
- "precision": 0.8624,
95
- "recall": 0.8961,
96
- "specificity": 0.9771,
97
- "false_positive_rate": 0.0229,
98
- "false_negative_rate": 0.1039,
99
- "negative_predictive_value": 0.9832,
100
- "n_test_samples": 19005
101
- },
102
- "cis-lmu/glotlid/model.bin": {
103
- "f1_score": 0.7172,
104
- "precision": 0.9038,
105
- "recall": 0.5945,
106
- "specificity": 0.9899,
107
- "false_positive_rate": 0.0101,
108
- "false_negative_rate": 0.4055,
109
- "negative_predictive_value": 0.9384,
110
- "n_test_samples": 19005
111
- },
112
- "laurievb/OpenLID/model.bin": {
113
- "f1_score": 0.6146,
114
- "precision": 0.7279,
115
- "recall": 0.5318,
116
- "specificity": 0.9681,
117
- "false_positive_rate": 0.0319,
118
- "false_negative_rate": 0.4682,
119
- "negative_predictive_value": 0.9281,
120
- "n_test_samples": 19005
121
- },
122
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
123
- "f1_score": 0.8986,
124
- "precision": 0.9166,
125
- "recall": 0.8812,
126
- "specificity": 0.9871,
127
- "false_positive_rate": 0.0129,
128
- "false_negative_rate": 0.1188,
129
- "negative_predictive_value": 0.9811,
130
- "n_test_samples": 19005
131
- }
132
- }
133
- },
134
- {
135
- "Egypt": {
136
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
137
- "f1_score": 0.5929,
138
- "precision": 0.5835,
139
- "recall": 0.6025,
140
- "specificity": 0.993,
141
- "false_positive_rate": 0.007,
142
- "false_negative_rate": 0.3975,
143
- "negative_predictive_value": 0.9935,
144
- "n_test_samples": 2204
145
- },
146
- "cis-lmu/glotlid/model.bin": {
147
- "f1_score": 0.6028,
148
- "precision": 0.4837,
149
- "recall": 0.7999,
150
- "specificity": 0.9861,
151
- "false_positive_rate": 0.0139,
152
- "false_negative_rate": 0.2001,
153
- "negative_predictive_value": 0.9967,
154
- "n_test_samples": 2204
155
- },
156
- "laurievb/OpenLID/model.bin": {
157
- "f1_score": 0.4094,
158
- "precision": 0.2663,
159
- "recall": 0.8843,
160
- "specificity": 0.9603,
161
- "false_positive_rate": 0.0397,
162
- "false_negative_rate": 0.1157,
163
- "negative_predictive_value": 0.998,
164
- "n_test_samples": 2204
165
- },
166
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
167
- "f1_score": 0.0,
168
- "precision": 0.0,
169
- "recall": 0.0,
170
- "specificity": 1.0,
171
- "false_positive_rate": 0.0,
172
- "false_negative_rate": 1.0,
173
- "negative_predictive_value": 0.984,
174
- "n_test_samples": 2204
175
- }
176
- }
177
- },
178
- {
179
- "Palestine": {
180
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
181
- "f1_score": 0.2511,
182
- "precision": 0.3352,
183
- "recall": 0.2007,
184
- "specificity": 0.9957,
185
- "false_positive_rate": 0.0043,
186
- "false_negative_rate": 0.7993,
187
- "negative_predictive_value": 0.9914,
188
- "n_test_samples": 1465
189
- },
190
- "cis-lmu/glotlid/model.bin": {
191
- "f1_score": 0.0,
192
- "precision": 0.0,
193
- "recall": 0.0,
194
- "specificity": 1.0,
195
- "false_positive_rate": 0.0,
196
- "false_negative_rate": 1.0,
197
- "negative_predictive_value": 0.9894,
198
- "n_test_samples": 1465
199
- },
200
- "laurievb/OpenLID/model.bin": {
201
- "f1_score": 0.0,
202
- "precision": 0.0,
203
- "recall": 0.0,
204
- "specificity": 1.0,
205
- "false_positive_rate": 0.0,
206
- "false_negative_rate": 1.0,
207
- "negative_predictive_value": 0.9894,
208
- "n_test_samples": 1465
209
- },
210
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
211
- "f1_score": 0.0,
212
- "precision": 0.0,
213
- "recall": 0.0,
214
- "specificity": 1.0,
215
- "false_positive_rate": 0.0,
216
- "false_negative_rate": 1.0,
217
- "negative_predictive_value": 0.9894,
218
- "n_test_samples": 1465
219
- }
220
- }
221
- },
222
- {
223
- "Levantine": {
224
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
225
- "f1_score": 0.1723,
226
- "precision": 0.1386,
227
- "recall": 0.2275,
228
- "specificity": 0.9854,
229
- "false_positive_rate": 0.0146,
230
- "false_negative_rate": 0.7725,
231
- "negative_predictive_value": 0.992,
232
- "n_test_samples": 1402
233
- },
234
- "cis-lmu/glotlid/model.bin": {
235
- "f1_score": 0.1171,
236
- "precision": 0.073,
237
- "recall": 0.2953,
238
- "specificity": 0.9614,
239
- "false_positive_rate": 0.0386,
240
- "false_negative_rate": 0.7047,
241
- "negative_predictive_value": 0.9925,
242
- "n_test_samples": 1402
243
- },
244
- "laurievb/OpenLID/model.bin": {
245
- "f1_score": 0.1029,
246
- "precision": 0.0645,
247
- "recall": 0.2532,
248
- "specificity": 0.9622,
249
- "false_positive_rate": 0.0378,
250
- "false_negative_rate": 0.7468,
251
- "negative_predictive_value": 0.9921,
252
- "n_test_samples": 1402
253
- },
254
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
255
- "f1_score": 0.0,
256
- "precision": 0.0,
257
- "recall": 0.0,
258
- "specificity": 1.0,
259
- "false_positive_rate": 0.0,
260
- "false_negative_rate": 1.0,
261
- "negative_predictive_value": 0.9898,
262
- "n_test_samples": 1402
263
- }
264
- }
265
- },
266
- {
267
- "Saudi": {
268
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
269
- "f1_score": 0.4382,
270
- "precision": 0.3474,
271
- "recall": 0.5932,
272
- "specificity": 0.9887,
273
- "false_positive_rate": 0.0113,
274
- "false_negative_rate": 0.4068,
275
- "negative_predictive_value": 0.9958,
276
- "n_test_samples": 1384
277
- },
278
- "cis-lmu/glotlid/model.bin": {
279
- "f1_score": 0.3893,
280
- "precision": 0.2692,
281
- "recall": 0.703,
282
- "specificity": 0.9806,
283
- "false_positive_rate": 0.0194,
284
- "false_negative_rate": 0.297,
285
- "negative_predictive_value": 0.9969,
286
- "n_test_samples": 1384
287
- },
288
- "laurievb/OpenLID/model.bin": {
289
- "f1_score": 0.3436,
290
- "precision": 0.2381,
291
- "recall": 0.6171,
292
- "specificity": 0.9799,
293
- "false_positive_rate": 0.0201,
294
- "false_negative_rate": 0.3829,
295
- "negative_predictive_value": 0.996,
296
- "n_test_samples": 1384
297
- },
298
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
299
- "f1_score": 0.0,
300
- "precision": 0.0,
301
- "recall": 0.0,
302
- "specificity": 1.0,
303
- "false_positive_rate": 0.0,
304
- "false_negative_rate": 1.0,
305
- "negative_predictive_value": 0.9899,
306
- "n_test_samples": 1384
307
- }
308
- }
309
- },
310
- {
311
- "Jordan": {
312
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
313
- "f1_score": 0.2726,
314
- "precision": 0.4203,
315
- "recall": 0.2017,
316
- "specificity": 0.9972,
317
- "false_positive_rate": 0.0028,
318
- "false_negative_rate": 0.7983,
319
- "negative_predictive_value": 0.992,
320
- "n_test_samples": 1373
321
- },
322
- "cis-lmu/glotlid/model.bin": {
323
- "f1_score": 0.0,
324
- "precision": 0.0,
325
- "recall": 0.0,
326
- "specificity": 1.0,
327
- "false_positive_rate": 0.0,
328
- "false_negative_rate": 1.0,
329
- "negative_predictive_value": 0.99,
330
- "n_test_samples": 1373
331
- },
332
- "laurievb/OpenLID/model.bin": {
333
- "f1_score": 0.0,
334
- "precision": 0.0,
335
- "recall": 0.0,
336
- "specificity": 1.0,
337
- "false_positive_rate": 0.0,
338
- "false_negative_rate": 1.0,
339
- "negative_predictive_value": 0.99,
340
- "n_test_samples": 1373
341
- },
342
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
343
- "f1_score": 0.0,
344
- "precision": 0.0,
345
- "recall": 0.0,
346
- "specificity": 1.0,
347
- "false_positive_rate": 0.0,
348
- "false_negative_rate": 1.0,
349
- "negative_predictive_value": 0.99,
350
- "n_test_samples": 1373
351
- }
352
- }
353
- },
354
- {
355
- "Algeria": {
356
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
357
- "f1_score": 0.5221,
358
- "precision": 0.5849,
359
- "recall": 0.4714,
360
- "specificity": 0.9974,
361
- "false_positive_rate": 0.0026,
362
- "false_negative_rate": 0.5286,
363
- "negative_predictive_value": 0.9959,
364
- "n_test_samples": 1067
365
- },
366
- "cis-lmu/glotlid/model.bin": {
367
- "f1_score": 0.1235,
368
- "precision": 0.2751,
369
- "recall": 0.0797,
370
- "specificity": 0.9984,
371
- "false_positive_rate": 0.0016,
372
- "false_negative_rate": 0.9203,
373
- "negative_predictive_value": 0.9928,
374
- "n_test_samples": 1067
375
- },
376
- "laurievb/OpenLID/model.bin": {
377
- "f1_score": 0.0,
378
- "precision": 0.0,
379
- "recall": 0.0,
380
- "specificity": 1.0,
381
- "false_positive_rate": 0.0,
382
- "false_negative_rate": 1.0,
383
- "negative_predictive_value": 0.9922,
384
- "n_test_samples": 1067
385
- },
386
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
387
- "f1_score": 0.0,
388
- "precision": 0.0,
389
- "recall": 0.0,
390
- "specificity": 1.0,
391
- "false_positive_rate": 0.0,
392
- "false_negative_rate": 1.0,
393
- "negative_predictive_value": 0.9922,
394
- "n_test_samples": 1067
395
- }
396
- }
397
- },
398
- {
399
- "UAE": {
400
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
401
- "f1_score": 0.3452,
402
- "precision": 0.4207,
403
- "recall": 0.2926,
404
- "specificity": 0.9971,
405
- "false_positive_rate": 0.0029,
406
- "false_negative_rate": 0.7074,
407
- "negative_predictive_value": 0.9948,
408
- "n_test_samples": 998
409
- },
410
- "cis-lmu/glotlid/model.bin": {
411
- "f1_score": 0.0,
412
- "precision": 0.0,
413
- "recall": 0.0,
414
- "specificity": 1.0,
415
- "false_positive_rate": 0.0,
416
- "false_negative_rate": 1.0,
417
- "negative_predictive_value": 0.9927,
418
- "n_test_samples": 998
419
- },
420
- "laurievb/OpenLID/model.bin": {
421
- "f1_score": 0.0,
422
- "precision": 0.0,
423
- "recall": 0.0,
424
- "specificity": 1.0,
425
- "false_positive_rate": 0.0,
426
- "false_negative_rate": 1.0,
427
- "negative_predictive_value": 0.9927,
428
- "n_test_samples": 998
429
- },
430
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
431
- "f1_score": 0.0,
432
- "precision": 0.0,
433
- "recall": 0.0,
434
- "specificity": 1.0,
435
- "false_positive_rate": 0.0,
436
- "false_negative_rate": 1.0,
437
- "negative_predictive_value": 0.9927,
438
- "n_test_samples": 998
439
- }
440
- }
441
- },
442
- {
443
- "Mauritania": {
444
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
445
- "f1_score": 0.5343,
446
- "precision": 0.944,
447
- "recall": 0.3726,
448
- "specificity": 0.9998,
449
- "false_positive_rate": 0.0002,
450
- "false_negative_rate": 0.6274,
451
- "negative_predictive_value": 0.9957,
452
- "n_test_samples": 950
453
- },
454
- "cis-lmu/glotlid/model.bin": {
455
- "f1_score": 0.0,
456
- "precision": 0.0,
457
- "recall": 0.0,
458
- "specificity": 1.0,
459
- "false_positive_rate": 0.0,
460
- "false_negative_rate": 1.0,
461
- "negative_predictive_value": 0.9931,
462
- "n_test_samples": 950
463
- },
464
- "laurievb/OpenLID/model.bin": {
465
- "f1_score": 0.0,
466
- "precision": 0.0,
467
- "recall": 0.0,
468
- "specificity": 1.0,
469
- "false_positive_rate": 0.0,
470
- "false_negative_rate": 1.0,
471
- "negative_predictive_value": 0.9931,
472
- "n_test_samples": 950
473
- },
474
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
475
- "f1_score": 0.0,
476
- "precision": 0.0,
477
- "recall": 0.0,
478
- "specificity": 1.0,
479
- "false_positive_rate": 0.0,
480
- "false_negative_rate": 1.0,
481
- "negative_predictive_value": 0.9931,
482
- "n_test_samples": 950
483
- }
484
- }
485
- },
486
- {
487
- "Yemen": {
488
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
489
- "f1_score": 0.223,
490
- "precision": 0.5619,
491
- "recall": 0.1391,
492
- "specificity": 0.9993,
493
- "false_positive_rate": 0.0007,
494
- "false_negative_rate": 0.8609,
495
- "negative_predictive_value": 0.9943,
496
- "n_test_samples": 913
497
- },
498
- "cis-lmu/glotlid/model.bin": {
499
- "f1_score": 0.0,
500
- "precision": 0.0,
501
- "recall": 0.0,
502
- "specificity": 1.0,
503
- "false_positive_rate": 0.0,
504
- "false_negative_rate": 1.0,
505
- "negative_predictive_value": 0.9934,
506
- "n_test_samples": 913
507
- },
508
- "laurievb/OpenLID/model.bin": {
509
- "f1_score": 0.0,
510
- "precision": 0.0,
511
- "recall": 0.0,
512
- "specificity": 1.0,
513
- "false_positive_rate": 0.0,
514
- "false_negative_rate": 1.0,
515
- "negative_predictive_value": 0.9934,
516
- "n_test_samples": 913
517
- },
518
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
519
- "f1_score": 0.0,
520
- "precision": 0.0,
521
- "recall": 0.0,
522
- "specificity": 1.0,
523
- "false_positive_rate": 0.0,
524
- "false_negative_rate": 1.0,
525
- "negative_predictive_value": 0.9934,
526
- "n_test_samples": 913
527
- }
528
- }
529
- },
530
- {
531
- "Syria": {
532
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
533
- "f1_score": 0.1965,
534
- "precision": 0.2126,
535
- "recall": 0.1827,
536
- "specificity": 0.9971,
537
- "false_positive_rate": 0.0029,
538
- "false_negative_rate": 0.8173,
539
- "negative_predictive_value": 0.9965,
540
- "n_test_samples": 591
541
- },
542
- "cis-lmu/glotlid/model.bin": {
543
- "f1_score": 0.0,
544
- "precision": 0.0,
545
- "recall": 0.0,
546
- "specificity": 1.0,
547
- "false_positive_rate": 0.0,
548
- "false_negative_rate": 1.0,
549
- "negative_predictive_value": 0.9957,
550
- "n_test_samples": 591
551
- },
552
- "laurievb/OpenLID/model.bin": {
553
- "f1_score": 0.0,
554
- "precision": 0.0,
555
- "recall": 0.0,
556
- "specificity": 1.0,
557
- "false_positive_rate": 0.0,
558
- "false_negative_rate": 1.0,
559
- "negative_predictive_value": 0.9957,
560
- "n_test_samples": 591
561
- },
562
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
563
- "f1_score": 0.0,
564
- "precision": 0.0,
565
- "recall": 0.0,
566
- "specificity": 1.0,
567
- "false_positive_rate": 0.0,
568
- "false_negative_rate": 1.0,
569
- "negative_predictive_value": 0.9957,
570
- "n_test_samples": 591
571
- }
572
- }
573
- },
574
- {
575
- "Lebanon": {
576
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
577
- "f1_score": 0.2699,
578
- "precision": 0.2133,
579
- "recall": 0.3675,
580
- "specificity": 0.9967,
581
- "false_positive_rate": 0.0033,
582
- "false_negative_rate": 0.6325,
583
- "negative_predictive_value": 0.9985,
584
- "n_test_samples": 332
585
- },
586
- "cis-lmu/glotlid/model.bin": {
587
- "f1_score": 0.0,
588
- "precision": 0.0,
589
- "recall": 0.0,
590
- "specificity": 1.0,
591
- "false_positive_rate": 0.0,
592
- "false_negative_rate": 1.0,
593
- "negative_predictive_value": 0.9976,
594
- "n_test_samples": 332
595
- },
596
- "laurievb/OpenLID/model.bin": {
597
- "f1_score": 0.0,
598
- "precision": 0.0,
599
- "recall": 0.0,
600
- "specificity": 1.0,
601
- "false_positive_rate": 0.0,
602
- "false_negative_rate": 1.0,
603
- "negative_predictive_value": 0.9976,
604
- "n_test_samples": 332
605
- },
606
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
607
- "f1_score": 0.0,
608
- "precision": 0.0,
609
- "recall": 0.0,
610
- "specificity": 1.0,
611
- "false_positive_rate": 0.0,
612
- "false_negative_rate": 1.0,
613
- "negative_predictive_value": 0.9976,
614
- "n_test_samples": 332
615
- }
616
- }
617
- },
618
- {
619
- "Qatar": {
620
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
621
- "f1_score": 0.3145,
622
- "precision": 0.2315,
623
- "recall": 0.4905,
624
- "specificity": 0.9975,
625
- "false_positive_rate": 0.0025,
626
- "false_negative_rate": 0.5095,
627
- "negative_predictive_value": 0.9992,
628
- "n_test_samples": 210
629
- },
630
- "cis-lmu/glotlid/model.bin": {
631
- "f1_score": 0.0,
632
- "precision": 0.0,
633
- "recall": 0.0,
634
- "specificity": 1.0,
635
- "false_positive_rate": 0.0,
636
- "false_negative_rate": 1.0,
637
- "negative_predictive_value": 0.9985,
638
- "n_test_samples": 210
639
- },
640
- "laurievb/OpenLID/model.bin": {
641
- "f1_score": 0.0,
642
- "precision": 0.0,
643
- "recall": 0.0,
644
- "specificity": 1.0,
645
- "false_positive_rate": 0.0,
646
- "false_negative_rate": 1.0,
647
- "negative_predictive_value": 0.9985,
648
- "n_test_samples": 210
649
- },
650
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
651
- "f1_score": 0.0,
652
- "precision": 0.0,
653
- "recall": 0.0,
654
- "specificity": 1.0,
655
- "false_positive_rate": 0.0,
656
- "false_negative_rate": 1.0,
657
- "negative_predictive_value": 0.9985,
658
- "n_test_samples": 210
659
- }
660
- }
661
- },
662
- {
663
- "Iraq": {
664
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
665
- "f1_score": 0.4075,
666
- "precision": 0.3884,
667
- "recall": 0.4286,
668
- "specificity": 0.999,
669
- "false_positive_rate": 0.001,
670
- "false_negative_rate": 0.5714,
671
- "negative_predictive_value": 0.9992,
672
- "n_test_samples": 203
673
- },
674
- "cis-lmu/glotlid/model.bin": {
675
- "f1_score": 0.0098,
676
- "precision": 1.0,
677
- "recall": 0.0049,
678
- "specificity": 1.0,
679
- "false_positive_rate": 0.0,
680
- "false_negative_rate": 0.9951,
681
- "negative_predictive_value": 0.9985,
682
- "n_test_samples": 203
683
- },
684
- "laurievb/OpenLID/model.bin": {
685
- "f1_score": 0.0,
686
- "precision": 0.0,
687
- "recall": 0.0,
688
- "specificity": 1.0,
689
- "false_positive_rate": 0.0,
690
- "false_negative_rate": 1.0,
691
- "negative_predictive_value": 0.9985,
692
- "n_test_samples": 203
693
- },
694
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
695
- "f1_score": 0.0,
696
- "precision": 0.0,
697
- "recall": 0.0,
698
- "specificity": 1.0,
699
- "false_positive_rate": 0.0,
700
- "false_negative_rate": 1.0,
701
- "negative_predictive_value": 0.9985,
702
- "n_test_samples": 203
703
- }
704
- }
705
- },
706
- {
707
- "Libya": {
708
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
709
- "f1_score": 0.357,
710
- "precision": 0.2621,
711
- "recall": 0.5596,
712
- "specificity": 0.9978,
713
- "false_positive_rate": 0.0022,
714
- "false_negative_rate": 0.4404,
715
- "negative_predictive_value": 0.9994,
716
- "n_test_samples": 193
717
- },
718
- "cis-lmu/glotlid/model.bin": {
719
- "f1_score": 0.0,
720
- "precision": 0.0,
721
- "recall": 0.0,
722
- "specificity": 1.0,
723
- "false_positive_rate": 0.0,
724
- "false_negative_rate": 1.0,
725
- "negative_predictive_value": 0.9986,
726
- "n_test_samples": 193
727
- },
728
- "laurievb/OpenLID/model.bin": {
729
- "f1_score": 0.0,
730
- "precision": 0.0,
731
- "recall": 0.0,
732
- "specificity": 1.0,
733
- "false_positive_rate": 0.0,
734
- "false_negative_rate": 1.0,
735
- "negative_predictive_value": 0.9986,
736
- "n_test_samples": 193
737
- },
738
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
739
- "f1_score": 0.0,
740
- "precision": 0.0,
741
- "recall": 0.0,
742
- "specificity": 1.0,
743
- "false_positive_rate": 0.0,
744
- "false_negative_rate": 1.0,
745
- "negative_predictive_value": 0.9986,
746
- "n_test_samples": 193
747
- }
748
- }
749
- },
750
- {
751
- "Tunisia": {
752
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
753
- "f1_score": 0.1851,
754
- "precision": 0.1089,
755
- "recall": 0.6158,
756
- "specificity": 0.993,
757
- "false_positive_rate": 0.007,
758
- "false_negative_rate": 0.3842,
759
- "negative_predictive_value": 0.9995,
760
- "n_test_samples": 190
761
- },
762
- "cis-lmu/glotlid/model.bin": {
763
- "f1_score": 0.1143,
764
- "precision": 0.0624,
765
- "recall": 0.6737,
766
- "specificity": 0.986,
767
- "false_positive_rate": 0.014,
768
- "false_negative_rate": 0.3263,
769
- "negative_predictive_value": 0.9995,
770
- "n_test_samples": 190
771
- },
772
- "laurievb/OpenLID/model.bin": {
773
- "f1_score": 0.1045,
774
- "precision": 0.0564,
775
- "recall": 0.7053,
776
- "specificity": 0.9837,
777
- "false_positive_rate": 0.0163,
778
- "false_negative_rate": 0.2947,
779
- "negative_predictive_value": 0.9996,
780
- "n_test_samples": 190
781
- },
782
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
783
- "f1_score": 0.0,
784
- "precision": 0.0,
785
- "recall": 0.0,
786
- "specificity": 1.0,
787
- "false_positive_rate": 0.0,
788
- "false_negative_rate": 1.0,
789
- "negative_predictive_value": 0.9986,
790
- "n_test_samples": 190
791
- }
792
- }
793
- },
794
- {
795
- "Oman": {
796
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
797
- "f1_score": 0.1969,
798
- "precision": 0.1391,
799
- "recall": 0.3368,
800
- "specificity": 0.9971,
801
- "false_positive_rate": 0.0029,
802
- "false_negative_rate": 0.6632,
803
- "negative_predictive_value": 0.9991,
804
- "n_test_samples": 190
805
- },
806
- "cis-lmu/glotlid/model.bin": {
807
- "f1_score": 0.0,
808
- "precision": 0.0,
809
- "recall": 0.0,
810
- "specificity": 1.0,
811
- "false_positive_rate": 0.0,
812
- "false_negative_rate": 1.0,
813
- "negative_predictive_value": 0.9986,
814
- "n_test_samples": 190
815
- },
816
- "laurievb/OpenLID/model.bin": {
817
- "f1_score": 0.0,
818
- "precision": 0.0,
819
- "recall": 0.0,
820
- "specificity": 1.0,
821
- "false_positive_rate": 0.0,
822
- "false_negative_rate": 1.0,
823
- "negative_predictive_value": 0.9986,
824
- "n_test_samples": 190
825
- },
826
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
827
- "f1_score": 0.0,
828
- "precision": 0.0,
829
- "recall": 0.0,
830
- "specificity": 1.0,
831
- "false_positive_rate": 0.0,
832
- "false_negative_rate": 1.0,
833
- "negative_predictive_value": 0.9986,
834
- "n_test_samples": 190
835
- }
836
- }
837
- },
838
- {
839
- "Kuwait": {
840
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
841
- "f1_score": 0.29,
842
- "precision": 0.2091,
843
- "recall": 0.4728,
844
- "specificity": 0.9976,
845
- "false_positive_rate": 0.0024,
846
- "false_negative_rate": 0.5272,
847
- "negative_predictive_value": 0.9993,
848
- "n_test_samples": 184
849
- },
850
- "cis-lmu/glotlid/model.bin": {
851
- "f1_score": 0.0,
852
- "precision": 0.0,
853
- "recall": 0.0,
854
- "specificity": 1.0,
855
- "false_positive_rate": 0.0,
856
- "false_negative_rate": 1.0,
857
- "negative_predictive_value": 0.9987,
858
- "n_test_samples": 184
859
- },
860
- "laurievb/OpenLID/model.bin": {
861
- "f1_score": 0.0,
862
- "precision": 0.0,
863
- "recall": 0.0,
864
- "specificity": 1.0,
865
- "false_positive_rate": 0.0,
866
- "false_negative_rate": 1.0,
867
- "negative_predictive_value": 0.9987,
868
- "n_test_samples": 184
869
- },
870
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
871
- "f1_score": 0.0,
872
- "precision": 0.0,
873
- "recall": 0.0,
874
- "specificity": 1.0,
875
- "false_positive_rate": 0.0,
876
- "false_negative_rate": 1.0,
877
- "negative_predictive_value": 0.9987,
878
- "n_test_samples": 184
879
- }
880
- }
881
- },
882
- {
883
- "Bahrain": {
884
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
885
- "f1_score": 0.2045,
886
- "precision": 0.2069,
887
- "recall": 0.2022,
888
- "specificity": 0.999,
889
- "false_positive_rate": 0.001,
890
- "false_negative_rate": 0.7978,
891
- "negative_predictive_value": 0.999,
892
- "n_test_samples": 178
893
- },
894
- "cis-lmu/glotlid/model.bin": {
895
- "f1_score": 0.0,
896
- "precision": 0.0,
897
- "recall": 0.0,
898
- "specificity": 1.0,
899
- "false_positive_rate": 0.0,
900
- "false_negative_rate": 1.0,
901
- "negative_predictive_value": 0.9987,
902
- "n_test_samples": 178
903
- },
904
- "laurievb/OpenLID/model.bin": {
905
- "f1_score": 0.0,
906
- "precision": 0.0,
907
- "recall": 0.0,
908
- "specificity": 1.0,
909
- "false_positive_rate": 0.0,
910
- "false_negative_rate": 1.0,
911
- "negative_predictive_value": 0.9987,
912
- "n_test_samples": 178
913
- },
914
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
915
- "f1_score": 0.0,
916
- "precision": 0.0,
917
- "recall": 0.0,
918
- "specificity": 1.0,
919
- "false_positive_rate": 0.0,
920
- "false_negative_rate": 1.0,
921
- "negative_predictive_value": 0.9987,
922
- "n_test_samples": 178
923
- }
924
- }
925
- },
926
- {
927
- "Sudan": {
928
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
929
- "f1_score": 0.395,
930
- "precision": 0.3198,
931
- "recall": 0.5163,
932
- "specificity": 0.9988,
933
- "false_positive_rate": 0.0012,
934
- "false_negative_rate": 0.4837,
935
- "negative_predictive_value": 0.9995,
936
- "n_test_samples": 153
937
- },
938
- "cis-lmu/glotlid/model.bin": {
939
- "f1_score": 0.0,
940
- "precision": 0.0,
941
- "recall": 0.0,
942
- "specificity": 1.0,
943
- "false_positive_rate": 0.0,
944
- "false_negative_rate": 1.0,
945
- "negative_predictive_value": 0.9989,
946
- "n_test_samples": 153
947
- },
948
- "laurievb/OpenLID/model.bin": {
949
- "f1_score": 0.0,
950
- "precision": 0.0,
951
- "recall": 0.0,
952
- "specificity": 1.0,
953
- "false_positive_rate": 0.0,
954
- "false_negative_rate": 1.0,
955
- "negative_predictive_value": 0.9989,
956
- "n_test_samples": 153
957
- },
958
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_binary_v3_1fpr.bin": {
959
- "f1_score": 0.0,
960
- "precision": 0.0,
961
- "recall": 0.0,
962
- "specificity": 1.0,
963
- "false_positive_rate": 0.0,
964
- "false_negative_rate": 1.0,
965
- "negative_predictive_value": 0.9989,
966
- "n_test_samples": 153
967
- }
968
- }
969
- },
970
- {
971
- "Turkey": {
972
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
973
- "f1_score": 0.0,
974
- "precision": 0.0,
975
- "recall": 0.0,
976
- "specificity": 0.9999,
977
- "false_positive_rate": 0.0001,
978
- "false_negative_rate": 0.0,
979
- "negative_predictive_value": 1.0,
980
- "n_test_samples": 0
981
- },
982
- "cis-lmu/glotlid/model.bin": {
983
- "f1_score": 0.0,
984
- "precision": 0.0,
985
- "recall": 0.0,
986
- "specificity": 1.0,
987
- "false_positive_rate": 0.0,
988
- "false_negative_rate": 0.0,
989
- "negative_predictive_value": 1.0,
990
- "n_test_samples": 0
991
- }
992
- }
993
- },
994
- {
995
- "Turkmenistan": {
996
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
997
- "f1_score": 0.0,
998
- "precision": 0.0,
999
- "recall": 0.0,
1000
- "specificity": 1.0,
1001
- "false_positive_rate": 0.0,
1002
- "false_negative_rate": 0.0,
1003
- "negative_predictive_value": 1.0,
1004
- "n_test_samples": 0
1005
- }
1006
- }
1007
- },
1008
- {
1009
- "Uzbekistan": {
1010
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1011
- "f1_score": 0.0,
1012
- "precision": 0.0,
1013
- "recall": 0.0,
1014
- "specificity": 0.9999,
1015
- "false_positive_rate": 0.0001,
1016
- "false_negative_rate": 0.0,
1017
- "negative_predictive_value": 1.0,
1018
- "n_test_samples": 0
1019
- }
1020
- }
1021
- },
1022
- {
1023
- "Acehnese": {
1024
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1025
- "f1_score": 0.0,
1026
- "precision": 0.0,
1027
- "recall": 0.0,
1028
- "specificity": 1.0,
1029
- "false_positive_rate": 0.0,
1030
- "false_negative_rate": 0.0,
1031
- "negative_predictive_value": 1.0,
1032
- "n_test_samples": 0
1033
- },
1034
- "laurievb/OpenLID/model.bin": {
1035
- "f1_score": 0.0,
1036
- "precision": 0.0,
1037
- "recall": 0.0,
1038
- "specificity": 1.0,
1039
- "false_positive_rate": 0.0,
1040
- "false_negative_rate": 0.0,
1041
- "negative_predictive_value": 1.0,
1042
- "n_test_samples": 0
1043
- }
1044
- }
1045
- },
1046
- {
1047
- "Nigeria": {
1048
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1049
- "f1_score": 0.0,
1050
- "precision": 0.0,
1051
- "recall": 0.0,
1052
- "specificity": 1.0,
1053
- "false_positive_rate": 0.0,
1054
- "false_negative_rate": 0.0,
1055
- "negative_predictive_value": 1.0,
1056
- "n_test_samples": 0
1057
- },
1058
- "cis-lmu/glotlid/model.bin": {
1059
- "f1_score": 0.0,
1060
- "precision": 0.0,
1061
- "recall": 0.0,
1062
- "specificity": 1.0,
1063
- "false_positive_rate": 0.0,
1064
- "false_negative_rate": 0.0,
1065
- "negative_predictive_value": 1.0,
1066
- "n_test_samples": 0
1067
- },
1068
- "laurievb/OpenLID/model.bin": {
1069
- "f1_score": 0.0,
1070
- "precision": 0.0,
1071
- "recall": 0.0,
1072
- "specificity": 1.0,
1073
- "false_positive_rate": 0.0,
1074
- "false_negative_rate": 0.0,
1075
- "negative_predictive_value": 1.0,
1076
- "n_test_samples": 0
1077
- }
1078
- }
1079
- },
1080
- {
1081
- "Mesopotamia": {
1082
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1083
- "f1_score": 0.0,
1084
- "precision": 0.0,
1085
- "recall": 0.0,
1086
- "specificity": 0.9993,
1087
- "false_positive_rate": 0.0007,
1088
- "false_negative_rate": 0.0,
1089
- "negative_predictive_value": 1.0,
1090
- "n_test_samples": 0
1091
- },
1092
- "cis-lmu/glotlid/model.bin": {
1093
- "f1_score": 0.0,
1094
- "precision": 0.0,
1095
- "recall": 0.0,
1096
- "specificity": 0.9983,
1097
- "false_positive_rate": 0.0017,
1098
- "false_negative_rate": 0.0,
1099
- "negative_predictive_value": 1.0,
1100
- "n_test_samples": 0
1101
- },
1102
- "laurievb/OpenLID/model.bin": {
1103
- "f1_score": 0.0,
1104
- "precision": 0.0,
1105
- "recall": 0.0,
1106
- "specificity": 0.9989,
1107
- "false_positive_rate": 0.0011,
1108
- "false_negative_rate": 0.0,
1109
- "negative_predictive_value": 1.0,
1110
- "n_test_samples": 0
1111
- }
1112
- }
1113
- },
1114
- {
1115
- "Afghanistan": {
1116
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1117
- "f1_score": 0.0,
1118
- "precision": 0.0,
1119
- "recall": 0.0,
1120
- "specificity": 0.9998,
1121
- "false_positive_rate": 0.0002,
1122
- "false_negative_rate": 0.0,
1123
- "negative_predictive_value": 1.0,
1124
- "n_test_samples": 0
1125
- },
1126
- "cis-lmu/glotlid/model.bin": {
1127
- "f1_score": 0.0,
1128
- "precision": 0.0,
1129
- "recall": 0.0,
1130
- "specificity": 0.9999,
1131
- "false_positive_rate": 0.0001,
1132
- "false_negative_rate": 0.0,
1133
- "negative_predictive_value": 1.0,
1134
- "n_test_samples": 0
1135
- },
1136
- "laurievb/OpenLID/model.bin": {
1137
- "f1_score": 0.0,
1138
- "precision": 0.0,
1139
- "recall": 0.0,
1140
- "specificity": 0.9995,
1141
- "false_positive_rate": 0.0005,
1142
- "false_negative_rate": 0.0,
1143
- "negative_predictive_value": 1.0,
1144
- "n_test_samples": 0
1145
- }
1146
- }
1147
- },
1148
- {
1149
- "Kurdistan": {
1150
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1151
- "f1_score": 0.0,
1152
- "precision": 0.0,
1153
- "recall": 0.0,
1154
- "specificity": 0.9997,
1155
- "false_positive_rate": 0.0003,
1156
- "false_negative_rate": 0.0,
1157
- "negative_predictive_value": 1.0,
1158
- "n_test_samples": 0
1159
- },
1160
- "cis-lmu/glotlid/model.bin": {
1161
- "f1_score": 0.0,
1162
- "precision": 0.0,
1163
- "recall": 0.0,
1164
- "specificity": 1.0,
1165
- "false_positive_rate": 0.0,
1166
- "false_negative_rate": 0.0,
1167
- "negative_predictive_value": 1.0,
1168
- "n_test_samples": 0
1169
- },
1170
- "laurievb/OpenLID/model.bin": {
1171
- "f1_score": 0.0,
1172
- "precision": 0.0,
1173
- "recall": 0.0,
1174
- "specificity": 0.9999,
1175
- "false_positive_rate": 0.0001,
1176
- "false_negative_rate": 0.0,
1177
- "negative_predictive_value": 1.0,
1178
- "n_test_samples": 0
1179
- }
1180
- }
1181
- },
1182
- {
1183
- "Kashmir": {
1184
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1185
- "f1_score": 0.0,
1186
- "precision": 0.0,
1187
- "recall": 0.0,
1188
- "specificity": 0.9999,
1189
- "false_positive_rate": 0.0001,
1190
- "false_negative_rate": 0.0,
1191
- "negative_predictive_value": 1.0,
1192
- "n_test_samples": 0
1193
- },
1194
- "laurievb/OpenLID/model.bin": {
1195
- "f1_score": 0.0,
1196
- "precision": 0.0,
1197
- "recall": 0.0,
1198
- "specificity": 0.9993,
1199
- "false_positive_rate": 0.0007,
1200
- "false_negative_rate": 0.0,
1201
- "negative_predictive_value": 1.0,
1202
- "n_test_samples": 0
1203
- }
1204
- }
1205
- },
1206
- {
1207
- "Iran": {
1208
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1209
- "f1_score": 0.0,
1210
- "precision": 0.0,
1211
- "recall": 0.0,
1212
- "specificity": 0.9994,
1213
- "false_positive_rate": 0.0006,
1214
- "false_negative_rate": 0.0,
1215
- "negative_predictive_value": 1.0,
1216
- "n_test_samples": 0
1217
- },
1218
- "cis-lmu/glotlid/model.bin": {
1219
- "f1_score": 0.0,
1220
- "precision": 0.0,
1221
- "recall": 0.0,
1222
- "specificity": 0.9999,
1223
- "false_positive_rate": 0.0001,
1224
- "false_negative_rate": 0.0,
1225
- "negative_predictive_value": 1.0,
1226
- "n_test_samples": 0
1227
- }
1228
- }
1229
- },
1230
- {
1231
- "Indonesia": {
1232
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1233
- "f1_score": 0.0,
1234
- "precision": 0.0,
1235
- "recall": 0.0,
1236
- "specificity": 1.0,
1237
- "false_positive_rate": 0.0,
1238
- "false_negative_rate": 0.0,
1239
- "negative_predictive_value": 1.0,
1240
- "n_test_samples": 0
1241
- },
1242
- "cis-lmu/glotlid/model.bin": {
1243
- "f1_score": 0.0,
1244
- "precision": 0.0,
1245
- "recall": 0.0,
1246
- "specificity": 1.0,
1247
- "false_positive_rate": 0.0,
1248
- "false_negative_rate": 0.0,
1249
- "negative_predictive_value": 1.0,
1250
- "n_test_samples": 0
1251
- },
1252
- "laurievb/OpenLID/model.bin": {
1253
- "f1_score": 0.0,
1254
- "precision": 0.0,
1255
- "recall": 0.0,
1256
- "specificity": 0.9999,
1257
- "false_positive_rate": 0.0001,
1258
- "false_negative_rate": 0.0,
1259
- "negative_predictive_value": 1.0,
1260
- "n_test_samples": 0
1261
- }
1262
- }
1263
- },
1264
- {
1265
- "Guinea": {
1266
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1267
- "f1_score": 0.0,
1268
- "precision": 0.0,
1269
- "recall": 0.0,
1270
- "specificity": 1.0,
1271
- "false_positive_rate": 0.0,
1272
- "false_negative_rate": 0.0,
1273
- "negative_predictive_value": 1.0,
1274
- "n_test_samples": 0
1275
- }
1276
- }
1277
- },
1278
- {
1279
- "Chad": {
1280
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1281
- "f1_score": 0.0,
1282
- "precision": 0.0,
1283
- "recall": 0.0,
1284
- "specificity": 1.0,
1285
- "false_positive_rate": 0.0,
1286
- "false_negative_rate": 0.0,
1287
- "negative_predictive_value": 1.0,
1288
- "n_test_samples": 0
1289
- }
1290
- }
1291
- },
1292
- {
1293
- "Azerbaijan": {
1294
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1295
- "f1_score": 0.0,
1296
- "precision": 0.0,
1297
- "recall": 0.0,
1298
- "specificity": 0.9997,
1299
- "false_positive_rate": 0.0003,
1300
- "false_negative_rate": 0.0,
1301
- "negative_predictive_value": 1.0,
1302
- "n_test_samples": 0
1303
- },
1304
- "cis-lmu/glotlid/model.bin": {
1305
- "f1_score": 0.0,
1306
- "precision": 0.0,
1307
- "recall": 0.0,
1308
- "specificity": 0.9999,
1309
- "false_positive_rate": 0.0001,
1310
- "false_negative_rate": 0.0,
1311
- "negative_predictive_value": 1.0,
1312
- "n_test_samples": 0
1313
- },
1314
- "laurievb/OpenLID/model.bin": {
1315
- "f1_score": 0.0,
1316
- "precision": 0.0,
1317
- "recall": 0.0,
1318
- "specificity": 1.0,
1319
- "false_positive_rate": 0.0,
1320
- "false_negative_rate": 0.0,
1321
- "negative_predictive_value": 1.0,
1322
- "n_test_samples": 0
1323
- }
1324
- }
1325
- },
1326
- {
1327
- "Malaysia": {
1328
- "atlasia/Sfaya-Moroccan-Darija-vs-All/model_multi_v3_2fpr.bin": {
1329
- "f1_score": 0.0,
1330
- "precision": 0.0,
1331
- "recall": 0.0,
1332
- "specificity": 1.0,
1333
- "false_positive_rate": 0.0,
1334
- "false_negative_rate": 0.0,
1335
- "negative_predictive_value": 1.0,
1336
- "n_test_samples": 0
1337
- }
1338
- }
1339
- },
1340
- {
1341
- "Uighur (China)": {
1342
- "cis-lmu/glotlid/model.bin": {
1343
- "f1_score": 0.0,
1344
- "precision": 0.0,
1345
- "recall": 0.0,
1346
- "specificity": 1.0,
1347
- "false_positive_rate": 0.0,
1348
- "false_negative_rate": 0.0,
1349
- "negative_predictive_value": 1.0,
1350
- "n_test_samples": 0
1351
- },
1352
- "laurievb/OpenLID/model.bin": {
1353
- "f1_score": 0.0,
1354
- "precision": 0.0,
1355
- "recall": 0.0,
1356
- "specificity": 1.0,
1357
- "false_positive_rate": 0.0,
1358
- "false_negative_rate": 0.0,
1359
- "negative_predictive_value": 1.0,
1360
- "n_test_samples": 0
1361
- }
1362
- }
1363
- },
1364
- {
1365
- "Balochistan": {
1366
- "cis-lmu/glotlid/model.bin": {
1367
- "f1_score": 0.0,
1368
- "precision": 0.0,
1369
- "recall": 0.0,
1370
- "specificity": 1.0,
1371
- "false_positive_rate": 0.0,
1372
- "false_negative_rate": 0.0,
1373
- "negative_predictive_value": 1.0,
1374
- "n_test_samples": 0
1375
- }
1376
- }
1377
- }
1378
  ]
 
1
  [
2
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ]
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
- fasttext
2
- transformers
3
- datasets
4
- pandas
5
- scikit-learn
 
 
1
+ fasttext==0.9.3
2
+ transformers==4.39.2
3
+ datasets==2.18.0
4
+ pandas==2.2.3
5
+ scikit-learn==1.4.2
6
+ numpy==1.26.4
utils.py CHANGED
@@ -1,24 +1,38 @@
1
  import base64
2
- from fasttext import load_model
3
  from huggingface_hub import hf_hub_download
 
4
  import os
5
  import json
6
  import pandas as pd
7
- from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, balanced_accuracy_score, matthews_corrcoef
 
 
 
 
 
 
 
8
  import numpy as np
9
  from datasets import load_dataset
10
- import fasttext
11
 
12
  # Constants
13
  MODEL_REPO = "atlasia/Sfaya-Moroccan-Darija-vs-All"
14
  BIN_FILENAME = "model_multi_v3_2fpr.bin"
15
  BINARY_LEADERBOARD_FILE = "darija_leaderboard_binary.json"
16
  MULTILINGUAL_LEADERBOARD_FILE = "darija_leaderboard_multilingual.json"
17
- DATA_PATH = "atlasia/No-Arabic-Dialect-Left-Behind-Filtered-Balanced"
18
 
19
  target_label = "Morocco"
20
  is_binary = False
21
 
 
 
 
 
 
 
 
 
22
  metrics = [
23
  'f1_score',
24
  'precision',
@@ -38,6 +52,16 @@ default_metrics = [
38
  'false_negative_rate'
39
  ]
40
 
 
 
 
 
 
 
 
 
 
 
41
  language_mapping_dict = {
42
  'ace_Arab': 'Acehnese',
43
  'acm_Arab': 'Mesopotamia', # 'Gilit Mesopotamian'
@@ -228,53 +252,87 @@ def run_eval_one_vs_all(model, data_test, TARGET_LANG='Morocco', language_mappin
228
  out = out.reset_index()
229
  out = out[out['preds']==TARGET_LANG].drop(columns=['preds', 'size'])
230
 
 
 
231
  return out
232
 
233
- def update_darija_binary_leaderboard(result_df, model_name, BINARY_LEADERBOARD_FILE="darija_leaderboard_binary.json"):
234
  try:
235
  with open(BINARY_LEADERBOARD_FILE, "r") as f:
236
  data = json.load(f)
237
  except FileNotFoundError:
238
  data = []
239
-
240
  # Process the results for each dialect/country
241
  for _, row in result_df.iterrows():
242
- country = row['dialect']
243
- # skip 'Other' class, it is considered as the null space
244
- if country == 'Other':
245
  continue
246
-
247
- # Find existing country entry or create new one
248
- country_entry = next((item for item in data if country in item), None)
249
- if country_entry is None:
250
- country_entry = {country: {}}
251
- data.append(country_entry)
252
 
253
- # Update the model metrics directly under the model name
254
- if country not in country_entry:
255
- country_entry[country] = {}
256
- country_entry[country][model_name] = float(row['false_positive_rate'])
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- if country_entry[country].get("n_test_samples") is None:
259
- country_entry[country]["n_test_samples"] = int(row['size'])
 
260
 
261
  # Save updated leaderboard data
262
- with open(MULTILINGUAL_LEADERBOARD_FILE, "w") as f:
263
  json.dump(data, f, indent=4)
264
 
265
  def handle_evaluation(model_path, model_path_bin, use_mapping=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  # run the evaluation
267
- result_df, _ = run_eval(model_path, model_path_bin, language_mapping_dict, use_mapping=use_mapping)
268
  # set the model name
269
  model_name = model_path + '/' + model_path_bin
270
- # update the leaderboard
 
271
  update_darija_multilingual_leaderboard(result_df, model_name, MULTILINGUAL_LEADERBOARD_FILE)
272
- # update the leaderboard table
273
- df = load_leaderboard_multilingual()
274
 
275
- return create_leaderboard_display_multilingual(df, 'Morocco', default_metrics)
 
 
 
 
 
 
 
 
 
 
 
276
 
277
- def run_eval(model_path, model_path_bin, language_mapping_dict=None, use_mapping=False):
278
  """Run evaluation on a dataset and compute metrics.
279
 
280
  Args:
@@ -288,21 +346,6 @@ def run_eval(model_path, model_path_bin, language_mapping_dict=None, use_mapping
288
  pd.DataFrame: A DataFrame containing evaluation metrics.
289
  """
290
 
291
- # download model and get the model path
292
- model_path = hf_hub_download(repo_id=model_path, filename=model_path_bin, cache_dir=None)
293
-
294
- # Load the trained model
295
- print(f"[INFO] Loading model from Path: {model_path}, using version {model_path_bin}...")
296
- model = fasttext.load_model(model_path)
297
-
298
- # Load the evaluation dataset
299
- print(f"[INFO] Loading evaluation dataset from Path: atlasia/No-Arabic-Dialect-Left-Behind-Filtered-Balanced...")
300
- eval_dataset = load_dataset("atlasia/No-Arabic-Dialect-Left-Behind-Filtered-Balanced", split='test')
301
-
302
- # Transform to pandas DataFrame
303
- print(f"[INFO] Converting evaluation dataset to Pandas DataFrame...")
304
- df_eval = pd.DataFrame(eval_dataset)
305
-
306
  # Predict labels using the model
307
  print(f"[INFO] Running predictions...")
308
  df_eval['preds'] = df_eval['text'].apply(lambda text: predict_label(text, model, language_mapping_dict, use_mapping=use_mapping))
@@ -318,7 +361,7 @@ def run_eval(model_path, model_path_bin, language_mapping_dict=None, use_mapping
318
 
319
  return result_df, df_eval
320
 
321
- def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/submissions/"):
322
  try:
323
  if file is None:
324
  return "Please upload a file."
@@ -352,13 +395,14 @@ def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/su
352
  # Update the leaderboards
353
  update_darija_multilingual_leaderboard(result_df, uploaded_model_name, MULTILINGUAL_LEADERBOARD_FILE)
354
 
355
- # result_df_binary = run_eval_one_vs_all(model, data_test, TARGET_LANG='Morocco', language_mapping_dict=None, use_mapping=False)
356
- # update_darija_binary_leaderboard(result_df, uploaded_model_name, BINARY_LEADERBOARD_FILE)
 
357
 
358
  # update the leaderboard table
359
  df = load_leaderboard_multilingual()
360
 
361
- return create_leaderboard_display_multilingual(df, 'Morocco', default_metrics)
362
 
363
  def update_darija_multilingual_leaderboard(result_df, model_name, MULTILINGUAL_LEADERBOARD_FILE="darija_leaderboard_multilingual.json"):
364
 
@@ -407,6 +451,42 @@ def update_darija_multilingual_leaderboard(result_df, model_name, MULTILINGUAL_L
407
  json.dump(data, f, indent=4)
408
 
409
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  def load_leaderboard_multilingual(MULTILINGUAL_LEADERBOARD_FILE="darija_leaderboard_multilingual.json"):
411
  current_dir = os.path.dirname(os.path.abspath(__file__))
412
  MULTILINGUAL_LEADERBOARD_FILE = os.path.join(current_dir, MULTILINGUAL_LEADERBOARD_FILE)
@@ -433,10 +513,36 @@ def load_leaderboard_multilingual(MULTILINGUAL_LEADERBOARD_FILE="darija_leaderbo
433
  df = pd.DataFrame(rows)
434
  return df
435
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  def create_leaderboard_display_multilingual(df, selected_country, selected_metrics):
437
  # Filter by country if specified
438
  if selected_country and selected_country.upper() != 'ALL':
439
- print(f"Filtering leaderboard by country: {selected_country}")
440
  df = df[df['country'] == selected_country]
441
  df = df.drop(columns=['country'])
442
 
@@ -466,6 +572,15 @@ def update_leaderboard_multilingual(country, selected_metrics):
466
  display_df = create_leaderboard_display_multilingual(df, country, selected_metrics)
467
  return display_df
468
 
 
 
 
 
 
 
 
 
 
469
  def encode_image_to_base64(image_path):
470
  with open(image_path, "rb") as image_file:
471
  encoded_string = base64.b64encode(image_file.read()).decode()
@@ -485,4 +600,37 @@ def create_html_image(image_path):
485
  </div>
486
  </div>
487
  """
488
- return html_string
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import base64
 
2
  from huggingface_hub import hf_hub_download
3
+ import fasttext
4
  import os
5
  import json
6
  import pandas as pd
7
+ from sklearn.metrics import (
8
+ precision_score,
9
+ recall_score,
10
+ f1_score,
11
+ confusion_matrix,
12
+ balanced_accuracy_score,
13
+ matthews_corrcoef
14
+ )
15
  import numpy as np
16
  from datasets import load_dataset
 
17
 
18
  # Constants
19
  MODEL_REPO = "atlasia/Sfaya-Moroccan-Darija-vs-All"
20
  BIN_FILENAME = "model_multi_v3_2fpr.bin"
21
  BINARY_LEADERBOARD_FILE = "darija_leaderboard_binary.json"
22
  MULTILINGUAL_LEADERBOARD_FILE = "darija_leaderboard_multilingual.json"
23
+ DATA_PATH = "atlasia/Arabic-LID-Leaderboard"
24
 
25
  target_label = "Morocco"
26
  is_binary = False
27
 
28
+ # Load test dataset
29
+ test_dataset = load_dataset(DATA_PATH, split='test')
30
+
31
+ # Supported dialects
32
+ all_target_languages = list(test_dataset.unique("dialect"))
33
+ supported_dialects = all_target_languages + ['All']
34
+ languages_to_display_one_vs_all = all_target_languages # everything except All
35
+
36
  metrics = [
37
  'f1_score',
38
  'precision',
 
52
  'false_negative_rate'
53
  ]
54
 
55
+ # default language to display in one-vs-all leaderboard
56
+ default_languages = [
57
+ 'Morocco',
58
+ 'MSA',
59
+ 'Egypt',
60
+ 'Algeria',
61
+ 'Tunisia',
62
+ 'Levantine',
63
+ ]
64
+
65
  language_mapping_dict = {
66
  'ace_Arab': 'Acehnese',
67
  'acm_Arab': 'Mesopotamia', # 'Gilit Mesopotamian'
 
252
  out = out.reset_index()
253
  out = out[out['preds']==TARGET_LANG].drop(columns=['preds', 'size'])
254
 
255
+ print(f'out for TARGET_LANG={TARGET_LANG} \n: {out}')
256
+
257
  return out
258
 
259
+ def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, BINARY_LEADERBOARD_FILE="darija_leaderboard_binary.json"):
260
  try:
261
  with open(BINARY_LEADERBOARD_FILE, "r") as f:
262
  data = json.load(f)
263
  except FileNotFoundError:
264
  data = []
265
+
266
  # Process the results for each dialect/country
267
  for _, row in result_df.iterrows():
268
+ dialect = row['dialect']
269
+ # Skip 'Other' class, it is considered as the null space
270
+ if dialect == 'Other':
271
  continue
 
 
 
 
 
 
272
 
273
+ # Find existing target_lang entry or create a new one
274
+ target_entry = next((item for item in data if target_lang in item), None)
275
+ if target_entry is None:
276
+ target_entry = {target_lang: {}}
277
+ data.append(target_entry)
278
+
279
+ # Get the country-specific data for this target language
280
+ country_data = target_entry[target_lang]
281
+
282
+ # Initialize the dialect/country entry if it doesn't exist
283
+ if dialect not in country_data:
284
+ country_data[dialect] = {}
285
+
286
+ # Update the model metrics under the model name for the given dialect
287
+ country_data[dialect][model_name] = float(row['false_positive_rate'])
288
 
289
+ # # Add the number of test samples, if not already present
290
+ # if "n_test_samples" not in country_data[dialect]:
291
+ # country_data[dialect]["n_test_samples"] = int(row['size'])
292
 
293
  # Save updated leaderboard data
294
+ with open(BINARY_LEADERBOARD_FILE, "w") as f:
295
  json.dump(data, f, indent=4)
296
 
297
  def handle_evaluation(model_path, model_path_bin, use_mapping=False):
298
+
299
+ # download model and get the model path
300
+ model_path = hf_hub_download(repo_id=model_path, filename=model_path_bin, cache_dir=None)
301
+
302
+ # Load the trained model
303
+ print(f"[INFO] Loading model from Path: {model_path}, using version {model_path_bin}...")
304
+ model = fasttext.load_model(model_path)
305
+
306
+ # Load the evaluation dataset
307
+ print(f"[INFO] Loading evaluation dataset from Path: {DATA_PATH}...")
308
+ eval_dataset = load_dataset(DATA_PATH, split='test')
309
+
310
+ # Transform to pandas DataFrame
311
+ print(f"[INFO] Converting evaluation dataset to Pandas DataFrame...")
312
+ df_eval = pd.DataFrame(eval_dataset)
313
+
314
  # run the evaluation
315
+ result_df, _ = run_eval(model, df_eval, language_mapping_dict, use_mapping=use_mapping)
316
  # set the model name
317
  model_name = model_path + '/' + model_path_bin
318
+
319
+ # update the multilingual leaderboard
320
  update_darija_multilingual_leaderboard(result_df, model_name, MULTILINGUAL_LEADERBOARD_FILE)
 
 
321
 
322
+ # # TODO
323
+ for target_lang in all_target_languages:
324
+ result_df_one_vs_all =run_eval_one_vs_all(model, df_eval, TARGET_LANG=target_lang, language_mapping_dict=language_mapping_dict, use_mapping=use_mapping)
325
+ update_darija_one_vs_all_leaderboard(result_df_one_vs_all, model_name, target_lang, BINARY_LEADERBOARD_FILE)
326
+
327
+ # load the updated leaderboard tables
328
+ df_multilingual = load_leaderboard_multilingual()
329
+ df_one_vs_all = load_leaderboard_one_vs_all()
330
+
331
+ status_message = "**Evaluation now ended! 🤗**"
332
+
333
+ return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
334
 
335
+ def run_eval(model, df_eval, language_mapping_dict=None, use_mapping=False):
336
  """Run evaluation on a dataset and compute metrics.
337
 
338
  Args:
 
346
  pd.DataFrame: A DataFrame containing evaluation metrics.
347
  """
348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  # Predict labels using the model
350
  print(f"[INFO] Running predictions...")
351
  df_eval['preds'] = df_eval['text'].apply(lambda text: predict_label(text, model, language_mapping_dict, use_mapping=use_mapping))
 
361
 
362
  return result_df, df_eval
363
 
364
+ def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/submissions/", default_language='Morocco'):
365
  try:
366
  if file is None:
367
  return "Please upload a file."
 
395
  # Update the leaderboards
396
  update_darija_multilingual_leaderboard(result_df, uploaded_model_name, MULTILINGUAL_LEADERBOARD_FILE)
397
 
398
+ # TODO: implement this ove_vs_all differently for people only submitting csv file. They need to submit two files, one for multi-lang and the other for one-vs-all
399
+ # result_df_one_vs_all = run_eval_one_vs_all(...)
400
+ # update_darija_one_vs_all_leaderboard(...)
401
 
402
  # update the leaderboard table
403
  df = load_leaderboard_multilingual()
404
 
405
+ return create_leaderboard_display_multilingual(df, default_language, default_metrics)
406
 
407
  def update_darija_multilingual_leaderboard(result_df, model_name, MULTILINGUAL_LEADERBOARD_FILE="darija_leaderboard_multilingual.json"):
408
 
 
451
  json.dump(data, f, indent=4)
452
 
453
 
454
+ def load_leaderboard_one_vs_all(BINARY_LEADERBOARD_FILE="darija_leaderboard_binary.json"):
455
+ current_dir = os.path.dirname(os.path.abspath(__file__))
456
+ BINARY_LEADERBOARD_FILE = os.path.join(current_dir, BINARY_LEADERBOARD_FILE)
457
+
458
+ with open(BINARY_LEADERBOARD_FILE, "r") as f:
459
+ data = json.load(f)
460
+
461
+ # Initialize lists to store the flattened data
462
+ rows = []
463
+
464
+ # Process each target language's data
465
+ for leaderboard_data in data:
466
+ for target_language, results in leaderboard_data.items():
467
+ for language, models in results.items():
468
+
469
+ for model_name, false_positive_rate in models.items():
470
+
471
+ row = {
472
+ 'target_language': target_language,
473
+ 'language': language,
474
+ 'model': model_name,
475
+ 'false_positive_rate': false_positive_rate,
476
+ }
477
+ # Add all metrics to the row
478
+ rows.append(row)
479
+
480
+ # Convert to DataFrame
481
+ df = pd.DataFrame(rows)
482
+
483
+ # Pivot the DataFrame to create the desired structure: all languages in columns and models in rows, and each (model, target_language, language) = false_positive_rate
484
+ df_pivot = df.pivot(index=['model', 'target_language'], columns='language', values='false_positive_rate').reset_index()
485
+
486
+ # print(f'df_pivot \n: {df_pivot}')
487
+
488
+ return df_pivot
489
+
490
  def load_leaderboard_multilingual(MULTILINGUAL_LEADERBOARD_FILE="darija_leaderboard_multilingual.json"):
491
  current_dir = os.path.dirname(os.path.abspath(__file__))
492
  MULTILINGUAL_LEADERBOARD_FILE = os.path.join(current_dir, MULTILINGUAL_LEADERBOARD_FILE)
 
513
  df = pd.DataFrame(rows)
514
  return df
515
 
516
+ def create_leaderboard_display_one_vs_all(df, target_language, selected_languages):
517
+
518
+ # Filter by target_language if specified
519
+ if target_language:
520
+ df = df[df['target_language'] == target_language]
521
+
522
+ # Remove the target_language from selected_languages
523
+ if target_language in selected_languages:
524
+ selected_languages = [lang for lang in selected_languages if lang != target_language]
525
+
526
+ # Select only the chosen languages (plus 'model' column)
527
+ columns_to_show = ['model'] + [language for language in selected_languages if language in df.columns]
528
+
529
+ # Sort by first selected metric by default
530
+ if selected_languages:
531
+ df = df.sort_values(by=selected_languages[0], ascending=False)
532
+
533
+ df = df[columns_to_show]
534
+
535
+ # Format numeric columns to 4 decimal places
536
+ numeric_cols = df.select_dtypes(include=['float64']).columns
537
+ df[numeric_cols] = df[numeric_cols].round(4)
538
+
539
+ return df, selected_languages
540
+
541
+
542
  def create_leaderboard_display_multilingual(df, selected_country, selected_metrics):
543
  # Filter by country if specified
544
  if selected_country and selected_country.upper() != 'ALL':
545
+ # print(f"Filtering leaderboard by country: {selected_country}")
546
  df = df[df['country'] == selected_country]
547
  df = df.drop(columns=['country'])
548
 
 
572
  display_df = create_leaderboard_display_multilingual(df, country, selected_metrics)
573
  return display_df
574
 
575
+ def update_leaderboard_one_vs_all(target_language, selected_languages):
576
+ if not selected_languages: # If no language selected, show all defaults
577
+ selected_languages = default_languages
578
+ df = load_leaderboard_one_vs_all()
579
+ display_df, selected_languages = create_leaderboard_display_one_vs_all(df, target_language, selected_languages)
580
+ # to improve visibility in case the user chooses multiple language leading to many columns, the `model` column must remain fixed
581
+ # display_df = render_fixed_columns(display_df)
582
+ return display_df, selected_languages
583
+
584
  def encode_image_to_base64(image_path):
585
  with open(image_path, "rb") as image_file:
586
  encoded_string = base64.b64encode(image_file.read()).decode()
 
600
  </div>
601
  </div>
602
  """
603
+ return html_string
604
+
605
+ # Function to render HTML table with fixed 'model' column
606
+ def render_fixed_columns(df):
607
+ style = """
608
+ <style>
609
+ .table-container {
610
+ overflow-x: auto;
611
+ position: relative;
612
+ white-space: nowrap;
613
+ }
614
+ table {
615
+ border-collapse: collapse;
616
+ width: 100%;
617
+ }
618
+ th, td {
619
+ border: 1px solid black;
620
+ padding: 8px;
621
+ text-align: left;
622
+ }
623
+ th.fixed, td.fixed {
624
+ position: sticky;
625
+ left: 0;
626
+ background-color: white;
627
+ z-index: 2;
628
+ }
629
+ </style>
630
+ """
631
+ table_html = df.to_html(index=False).replace(
632
+ "<th>model</th>", '<th class="fixed">model</th>'
633
+ ).replace(
634
+ '<td>', '<td class="fixed">', 1
635
+ )
636
+ return f"{style}<div class='table-container'>{table_html}</div>"