Spaces:

atlasia
/

Open-Arabic-Dialect-Identification-Leaderboard

Running

App Files Files Community

BounharAbdelaziz commited on Jan 8

Commit

d564ed1

1 Parent(s): cf9ff03

save via commit

Browse files

Files changed (1) hide show

utils.py +116 -117

utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import base64
 from huggingface_hub import hf_hub_download
 import fasttext
@@ -13,9 +14,8 @@ from sklearn.metrics import (
     matthews_corrcoef
 )
 import numpy as np
 from constants import *
-from huggingface_hub import HfApi, login
-from pathlib import Path
 def predict_label(text, model, language_mapping_dict, use_mapping=False):
     """
@@ -183,7 +183,51 @@ def run_eval_one_vs_all(data_test, TARGET_LANG='Morocco'):
     return out
 def handle_evaluation(model_path, model_path_bin, use_mapping=False):
     # download model and get the model path
@@ -300,6 +344,60 @@ def process_results_file(file, uploaded_model_name, base_path_save="./atlasia/su
     return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
 def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):
     current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -457,122 +555,23 @@ def render_fixed_columns(df):
     """ A function to render HTML table with fixed 'model' column for better visibility """
     return NotImplementedError
-def update_repo_file(api, repo_id, filename, data):
-    # Use the app directory
-    app_dir = Path("/home/user/app")
-    temp_file = app_dir / filename
-    # Write the updated data to file
-    with open(temp_file, "w") as f:
-        json.dump(data, f, indent=4)
-    try:
-        # Try to create the repo if it doesn't exist
-        api.create_repo(repo_id, exist_ok=True)
-        # Upload the file back to the repository
-        api.upload_file(
-            path_or_fileobj=str(temp_file),
-            path_in_repo=filename,
-            repo_id=repo_id,
-            repo_type="model",  # Changed back to "model" since it's a regular repo
-            commit_message=f"Update {filename}"
-        )
-    except Exception as e:
-        print(f"Error during repository operation: {str(e)}")
-        raise
-def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_dialect_confusion.json"):
-    # Initialize Hugging Face API
-    api = HfApi()
-    try:
-        # Download existing file
-        try:
-            file_content = api.fetch_file_content(
-                repo_id=LEADERBOARD_PATH,
-                filename=DIALECT_CONFUSION_LEADERBOARD_FILE,
-                repo_type="model"
-            )
-            data = json.loads(file_content)
-        except:
-            data = []
-        # Process the results
-        for _, row in result_df.iterrows():
-            dialect = row['dialect']
-            if dialect == 'Other':
-                continue
-            target_entry = next((item for item in data if target_lang in item), None)
-            if target_entry is None:
-                target_entry = {target_lang: {}}
-                data.append(target_entry)
-            country_data = target_entry[target_lang]
-            if dialect not in country_data:
-                country_data[dialect] = {}
-            country_data[dialect][model_name] = float(row['false_positive_rate'])
-        # Update the file in the repository
-        update_repo_file(api, LEADERBOARD_PATH, DIALECT_CONFUSION_LEADERBOARD_FILE, data)
-    except Exception as e:
-        print(f"Error updating repository: {str(e)}")
-        raise
-def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE="darija_leaderboard_multi_dialects.json"):
-    # Initialize Hugging Face API
-    api = HfApi()
     try:
-        # Download existing file
-        try:
-            file_content = api.fetch_file_content(
-                repo_id=LEADERBOARD_PATH,
-                filename=MULTI_DIALECTS_LEADERBOARD_FILE,
-                repo_type="model"
-            )
-            data = json.loads(file_content)
-        except:
-            data = []
-        # Process the results
-        for _, row in result_df.iterrows():
-            country = row['country']
-            if country == 'Other':
-                continue
-            metrics = {
-                'f1_score': float(row['f1_score']),
-                'precision': float(row['precision']),
-                'recall': float(row['recall']),
-                'macro_f1_score': float(row['macro_f1_score']),
-                'micro_f1_score': float(row['micro_f1_score']),
-                'weighted_f1_score': float(row['weighted_f1_score']),
-                'specificity': float(row['specificity']),
-                'false_positive_rate': float(row['false_positive_rate']),
-                'false_negative_rate': float(row['false_negative_rate']),
-                'negative_predictive_value': float(row['negative_predictive_value']),
-                'balanced_accuracy': float(row['balanced_accuracy']),
-                'matthews_correlation': float(row['matthews_correlation']),
-                'n_test_samples': int(row['samples'])
-            }
-            country_entry = next((item for item in data if country in item), None)
-            if country_entry is None:
-                country_entry = {country: {}}
-                data.append(country_entry)
-            if country not in country_entry:
-                country_entry[country] = {}
-            country_entry[country][model_name] = metrics
-        # Update the file in the repository
-        update_repo_file(api, LEADERBOARD_PATH, MULTI_DIALECTS_LEADERBOARD_FILE, data)
-    except Exception as e:
-        print(f"Error updating repository: {str(e)}")
-        raise

+import subprocess
 import base64
 from huggingface_hub import hf_hub_download
 import fasttext
     matthews_corrcoef
 )
 import numpy as np
 from constants import *
 def predict_label(text, model, language_mapping_dict, use_mapping=False):
     """
     return out
+def update_darija_one_vs_all_leaderboard(result_df, model_name, target_lang, DIALECT_CONFUSION_LEADERBOARD_FILE="darija_leaderboard_binary.json"):
+    # use base path to ensure correct saving
+    base_path = os.path.dirname(__file__)
+    json_file_path = os.path.join(base_path, DIALECT_CONFUSION_LEADERBOARD_FILE)
+    print(f"[INFO] Loading leaderboard data (json file) from: {json_file_path}")
+    # Load leaderboard data
+    try:
+        with open(json_file_path, "r") as f:
+            data = json.load(f)
+    except FileNotFoundError:
+        data = []
+    # Process the results for each dialect/country
+    for _, row in result_df.iterrows():
+        dialect = row['dialect']
+        # Skip 'Other' class, it is considered as the null space
+        if dialect == 'Other':
+            continue
+        # Find existing target_lang entry or create a new one
+        target_entry = next((item for item in data if target_lang in item), None)
+        if target_entry is None:
+            target_entry = {target_lang: {}}
+            data.append(target_entry)
+        # Get the country-specific data for this target language
+        country_data = target_entry[target_lang]
+        # Initialize the dialect/country entry if it doesn't exist
+        if dialect not in country_data:
+            country_data[dialect] = {}
+        # Update the model metrics under the model name for the given dialect
+        country_data[dialect][model_name] = float(row['false_positive_rate'])
+    # Save updated leaderboard data
+    with open(json_file_path, "w") as f:
+        json.dump(data, f, indent=4)
+    save_leaderboard_file(DIALECT_CONFUSION_LEADERBOARD_FILE)
 def handle_evaluation(model_path, model_path_bin, use_mapping=False):
     # download model and get the model path
     return create_leaderboard_display_multilingual(df_multilingual, target_label, default_metrics), status_message
+def update_darija_multilingual_leaderboard(result_df, model_name, MULTI_DIALECTS_LEADERBOARD_FILE):
+    # use base path to ensure correct saving
+    base_path = os.path.dirname(__file__)
+    json_file_path = os.path.join(base_path, MULTI_DIALECTS_LEADERBOARD_FILE)
+    # Load leaderboard data
+    try:
+        with open(json_file_path, "r") as f:
+            data = json.load(f)
+    except FileNotFoundError:
+        data = []
+    # Process the results for each dialect/country
+    for _, row in result_df.iterrows():
+        country = row['country']
+        # skip 'Other' class, it is considered as the null space
+        if country == 'Other':
+            continue
+        # Create metrics dictionary directly
+        metrics = {
+            'f1_score': float(row['f1_score']),
+            'precision': float(row['precision']),
+            'recall': float(row['recall']),
+            'macro_f1_score': float(row['macro_f1_score']),
+            'micro_f1_score': float(row['micro_f1_score']),
+            'weighted_f1_score': float(row['weighted_f1_score']),
+            'specificity': float(row['specificity']),
+            'false_positive_rate': float(row['false_positive_rate']),
+            'false_negative_rate': float(row['false_negative_rate']),
+            'negative_predictive_value': float(row['negative_predictive_value']),
+            'balanced_accuracy': float(row['balanced_accuracy']),
+            'matthews_correlation': float(row['matthews_correlation']),
+            'n_test_samples': int(row['samples'])
+        }
+        # Find existing country entry or create new one
+        country_entry = next((item for item in data if country in item), None)
+        if country_entry is None:
+            country_entry = {country: {}}
+            data.append(country_entry)
+        # Update the model metrics directly under the model name
+        if country not in country_entry:
+            country_entry[country] = {}
+        country_entry[country][model_name] = metrics
+    # Save updated leaderboard data
+    with open(json_file_path, "w") as f:
+        json.dump(data, f, indent=4)
+    save_leaderboard_file(MULTI_DIALECTS_LEADERBOARD_FILE)
 def load_leaderboard_one_vs_all(DIALECT_CONFUSION_LEADERBOARD_FILE):
     current_dir = os.path.dirname(os.path.abspath(__file__))
     """ A function to render HTML table with fixed 'model' column for better visibility """
     return NotImplementedError
+# Function to save and commit leaderboard files
+def save_leaderboard_file(FILE_PATH):
+    # Example data to save (replace with actual leaderboard data)
+    data = {"status": "updated", "data": []}
+    # Save data in json
+    with open(FILE_PATH, "w") as f:
+        json.dump(data, f, indent=4)
+        print(f"[INFO] Saved {FILE_PATH}")
+    # Commit changes to the repository
     try:
+        subprocess.run(["git", "add", FILE_PATH], check=True)
+        subprocess.run(["git", "commit", "-m", "Update leaderboard file"], check=True)
+        subprocess.run(["git", "push"], check=True)
+        print("[INFO] Leaderboard file committed and pushed to the repository.")
+    except subprocess.CalledProcessError as e:
+        print(f"[ERROR] Failed to commit or push changes: {e}")