Spaces:
Runtime error
Runtime error
Speedup tests
Browse files
utils.py
CHANGED
@@ -33,6 +33,8 @@ all_target_languages = list(test_dataset.unique("dialect"))
|
|
33 |
supported_dialects = all_target_languages + ['All']
|
34 |
languages_to_display_one_vs_all = all_target_languages # everything except All
|
35 |
|
|
|
|
|
36 |
metrics = [
|
37 |
'f1_score',
|
38 |
'precision',
|
@@ -229,12 +231,8 @@ def make_binary(dialect, target):
|
|
229 |
return 'Other'
|
230 |
return target
|
231 |
|
232 |
-
def run_eval_one_vs_all(
|
233 |
|
234 |
-
# Predict labels using the model
|
235 |
-
print(f"[INFO] Running predictions...")
|
236 |
-
data_test['preds'] = data_test['text'].apply(lambda text: predict_label(text, model, language_mapping_dict, use_mapping=use_mapping))
|
237 |
-
|
238 |
# map to binary
|
239 |
df_test_preds = data_test.copy()
|
240 |
df_test_preds.loc[df_test_preds['dialect'] == TARGET_LANG, 'dialect'] = TARGET_LANG
|
@@ -311,17 +309,20 @@ def handle_evaluation(model_path, model_path_bin, use_mapping=False):
|
|
311 |
print(f"[INFO] Converting evaluation dataset to Pandas DataFrame...")
|
312 |
df_eval = pd.DataFrame(eval_dataset)
|
313 |
|
|
|
|
|
|
|
|
|
314 |
# run the evaluation
|
315 |
-
result_df, _ = run_eval(
|
316 |
# set the model name
|
317 |
model_name = model_path + '/' + model_path_bin
|
318 |
|
319 |
# update the multilingual leaderboard
|
320 |
update_darija_multilingual_leaderboard(result_df, model_name, MULTILINGUAL_LEADERBOARD_FILE)
|
321 |
|
322 |
-
# # TODO
|
323 |
for target_lang in all_target_languages:
|
324 |
-
result_df_one_vs_all =run_eval_one_vs_all(
|
325 |
update_darija_one_vs_all_leaderboard(result_df_one_vs_all, model_name, target_lang, BINARY_LEADERBOARD_FILE)
|
326 |
|
327 |
# load the updated leaderboard tables
|
@@ -332,7 +333,7 @@ def handle_evaluation(model_path, model_path_bin, use_mapping=False):
|
|
332 |
|
333 |
return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
|
334 |
|
335 |
-
def run_eval(
|
336 |
"""Run evaluation on a dataset and compute metrics.
|
337 |
|
338 |
Args:
|
@@ -346,20 +347,19 @@ def run_eval(model, df_eval, language_mapping_dict=None, use_mapping=False):
|
|
346 |
pd.DataFrame: A DataFrame containing evaluation metrics.
|
347 |
"""
|
348 |
|
349 |
-
#
|
350 |
-
|
351 |
-
df_eval['preds'] = df_eval['text'].apply(lambda text: predict_label(text, model, language_mapping_dict, use_mapping=use_mapping))
|
352 |
|
353 |
# now drop the columns that are not needed, i.e. 'text'
|
354 |
-
|
355 |
|
356 |
# Compute evaluation metrics
|
357 |
print(f"[INFO] Computing metrics...")
|
358 |
-
result_df, _ = compute_classification_metrics(
|
359 |
|
360 |
# update_darija_multilingual_leaderboard(result_df, model_path, MULTILINGUAL_LEADERBOARD_FILE)
|
361 |
|
362 |
-
return result_df,
|
363 |
|
364 |
def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/submissions/", default_language='Morocco'):
|
365 |
try:
|
|
|
33 |
supported_dialects = all_target_languages + ['All']
|
34 |
languages_to_display_one_vs_all = all_target_languages # everything except All
|
35 |
|
36 |
+
print(f'all_target_languages: {all_target_languages}')
|
37 |
+
|
38 |
metrics = [
|
39 |
'f1_score',
|
40 |
'precision',
|
|
|
231 |
return 'Other'
|
232 |
return target
|
233 |
|
234 |
+
def run_eval_one_vs_all(data_test, TARGET_LANG='Morocco'):
|
235 |
|
|
|
|
|
|
|
|
|
236 |
# map to binary
|
237 |
df_test_preds = data_test.copy()
|
238 |
df_test_preds.loc[df_test_preds['dialect'] == TARGET_LANG, 'dialect'] = TARGET_LANG
|
|
|
309 |
print(f"[INFO] Converting evaluation dataset to Pandas DataFrame...")
|
310 |
df_eval = pd.DataFrame(eval_dataset)
|
311 |
|
312 |
+
# Predict labels using the model
|
313 |
+
print(f"[INFO] Running predictions...")
|
314 |
+
df_eval['preds'] = df_eval['text'].apply(lambda text: predict_label(text, model, language_mapping_dict, use_mapping=use_mapping))
|
315 |
+
|
316 |
# run the evaluation
|
317 |
+
result_df, _ = run_eval(df_eval)
|
318 |
# set the model name
|
319 |
model_name = model_path + '/' + model_path_bin
|
320 |
|
321 |
# update the multilingual leaderboard
|
322 |
update_darija_multilingual_leaderboard(result_df, model_name, MULTILINGUAL_LEADERBOARD_FILE)
|
323 |
|
|
|
324 |
for target_lang in all_target_languages:
|
325 |
+
result_df_one_vs_all =run_eval_one_vs_all(df_eval, TARGET_LANG=target_lang)
|
326 |
update_darija_one_vs_all_leaderboard(result_df_one_vs_all, model_name, target_lang, BINARY_LEADERBOARD_FILE)
|
327 |
|
328 |
# load the updated leaderboard tables
|
|
|
333 |
|
334 |
return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
|
335 |
|
336 |
+
def run_eval(df_eval):
|
337 |
"""Run evaluation on a dataset and compute metrics.
|
338 |
|
339 |
Args:
|
|
|
347 |
pd.DataFrame: A DataFrame containing evaluation metrics.
|
348 |
"""
|
349 |
|
350 |
+
# map to binary
|
351 |
+
df_eval_multilingual = df_eval.copy()
|
|
|
352 |
|
353 |
# now drop the columns that are not needed, i.e. 'text'
|
354 |
+
df_eval_multilingual = df_eval_multilingual.drop(columns=['text', 'metadata', 'dataset_source'])
|
355 |
|
356 |
# Compute evaluation metrics
|
357 |
print(f"[INFO] Computing metrics...")
|
358 |
+
result_df, _ = compute_classification_metrics(df_eval_multilingual)
|
359 |
|
360 |
# update_darija_multilingual_leaderboard(result_df, model_path, MULTILINGUAL_LEADERBOARD_FILE)
|
361 |
|
362 |
+
return result_df, df_eval_multilingual
|
363 |
|
364 |
def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/submissions/", default_language='Morocco'):
|
365 |
try:
|