Spaces:

atlasia
/

Open-Arabic-Dialect-Identification-Leaderboard

Running

App Files Files Community

BounharAbdelaziz commited on Jan 7

Commit

1c7ff5f

1 Parent(s): 04e4741

moved all constants + added status for uploaded tresults

Browse files

Files changed (1) hide show

app.py +21 -52

app.py CHANGED Viewed

@@ -6,51 +6,12 @@ from utils import (
     create_html_image,
 )
 import os
-from datasets import load_dataset
 import gradio as gr
 if __name__ == "__main__":
-    # Evaluation dataset path
-    DATA_PATH = "atlasia/Arabic-LID-Leaderboard"
-    # All Metrics
-    metrics = [
-        'f1_score',
-        'precision',
-        'recall',
-        'specificity',
-        'false_positive_rate',
-        'false_negative_rate',
-        'negative_predictive_value',
-        'n_test_samples',
-    ]
-    # Default metrics to display
-    default_metrics = [
-        'f1_score',
-        'precision',
-        'recall',
-        'false_positive_rate',
-        'false_negative_rate'
-    ]
-    # default language to display
-    default_languages = [
-        'Morocco',
-        'MSA',
-        'Egypt',
-        'Algeria',
-        'Tunisia',
-        'Levantine',
-    ]
-    # Load test dataset
-    test_dataset = load_dataset(DATA_PATH, split='test')
-    # Supported dialects
-    all_target_languages = list(test_dataset.unique("dialect"))
-    supported_dialects = all_target_languages + ['All']
-    languages_to_display_one_vs_all = all_target_languages # everything except All
     with gr.Blocks() as app:
         base_path = os.path.dirname(__file__)
         local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
@@ -58,8 +19,8 @@ if __name__ == "__main__":
         gr.HTML(create_html_image(local_image_path))
         gr.Markdown("# 🏅 Open Arabic Dialect Identification Leaderboard")
-        # Multilingual model leaderboard
-        with gr.Tab("Multilingual model leaderboard"):
             gr.Markdown("""
                         Complete leaderboard across multiple arabic dialects.
                         Compare the performance of different models across various metrics such as FNR, FPR, and other clasical metrics.
@@ -97,13 +58,13 @@ if __name__ == "__main__":
                         Let's work together to improve Arabic dialect identification! 🚀
                         """)
-        # Binary model leaderboard
-        with gr.Tab("One-vs-All leaderboard"):
             gr.Markdown("""
-                        A kind of one-vs-all approach for evaluating LID models across multiple arabic dialects.
-                        Computes the `false_positive_rate` of different models for a given target language.
-                        This should help you understand how well a model can identify a specific dialect by
                         showing **how often it misclassifies other dialects as the target dialect**.
                         """
             )
@@ -138,8 +99,8 @@ if __name__ == "__main__":
             model_path = gr.Textbox(label="Model Path", placeholder='path/to/model')
             model_path_bin = gr.Textbox(label=".bin filename", placeholder='model.bin')
             gr.Markdown("### **⚠️ To ensure correct results, tick this when the model's labels are the iso_codes**")
-            use_mapping = gr.Checkbox(label="Does not map to country")
-            eval_button = gr.Button("Evaluate", value=False)  # Initially disabled
             # Status message area
             status_message = gr.Markdown(value="")
@@ -160,7 +121,7 @@ if __name__ == "__main__":
             model = ... # Load your model here
             # Load evaluation benchmark
-            eval_dataset = load_dataset("atlasia/No-Arabic-Dialect-Left-Behind-Filtered-Balanced", split='test').to_pandas() # do not change this line :)
             # Predict labels using your model
             eval_dataset['preds'] = eval_dataset['text'].apply(lambda text: predict_label(text, model)) # predict_label is a function that you need to define for your model
@@ -182,7 +143,15 @@ if __name__ == "__main__":
             uploaded_model_name = gr.Textbox(label="Model name", placeholder='Your model/team name')
             file = gr.File(label="Upload your results")
             upload_button = gr.Button("Upload")
-            upload_button.click(process_results_file, inputs=[file, uploaded_model_name], outputs=[leaderboard_table])
         # Update multilangual table when any input changes
         country_selector.change(

     create_html_image,
 )
 import os
 import gradio as gr
+from constants import *
 if __name__ == "__main__":
     with gr.Blocks() as app:
         base_path = os.path.dirname(__file__)
         local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
         gr.HTML(create_html_image(local_image_path))
         gr.Markdown("# 🏅 Open Arabic Dialect Identification Leaderboard")
+        # Multi-dialects leaderboard
+        with gr.Tab("Multi-dialects model leaderboard"):
             gr.Markdown("""
                         Complete leaderboard across multiple arabic dialects.
                         Compare the performance of different models across various metrics such as FNR, FPR, and other clasical metrics.
                         Let's work together to improve Arabic dialect identification! 🚀
                         """)
+        # Dialect confusion leaderboard
+        with gr.Tab("Dialect confusion leaderboard"): # use to be "One-vs-All leaderboard"
             gr.Markdown("""
+                        Detailed analysis of how well models distinguish specific dialects from others.
+                        For each target dialect, see how often models incorrectly classify other dialects as the target.
+                        Lower `false_positive_rate` indicate better ability to identify the true dialect by
                         showing **how often it misclassifies other dialects as the target dialect**.
                         """
             )
             model_path = gr.Textbox(label="Model Path", placeholder='path/to/model')
             model_path_bin = gr.Textbox(label=".bin filename", placeholder='model.bin')
             gr.Markdown("### **⚠️ To ensure correct results, tick this when the model's labels are the iso_codes**")
+            use_mapping = gr.Checkbox(label="Does not map to country", value=True)      # Initially enabled
+            eval_button = gr.Button("Evaluate", value=False)                            # Initially disabled
             # Status message area
             status_message = gr.Markdown(value="")
             model = ... # Load your model here
             # Load evaluation benchmark
+            eval_dataset = load_dataset("atlasia/Arabic-LID-Leaderboard", split='test').to_pandas() # do not change this line :)
             # Predict labels using your model
             eval_dataset['preds'] = eval_dataset['text'].apply(lambda text: predict_label(text, model)) # predict_label is a function that you need to define for your model
             uploaded_model_name = gr.Textbox(label="Model name", placeholder='Your model/team name')
             file = gr.File(label="Upload your results")
             upload_button = gr.Button("Upload")
+            # Status message area
+            status_message = gr.Markdown(value="")
+            def update_status_message():
+                return "### **⚠️Evaluating... Please wait...**"
+            upload_button.click(update_status_message, outputs=[status_message])
+            upload_button.click(process_results_file, inputs=[file, uploaded_model_name], outputs=[leaderboard_table, status_message])
         # Update multilangual table when any input changes
         country_selector.change(