import os from datasets import Dataset from config import LABELS import pandas as pd import os from datasets import Dataset, DatasetDict import pandas as pd from config import LABELS def load_audio_data(data_dir): data = [] for label_name, label_id in LABELS.items(): label_dir = os.path.join(data_dir, label_name) for file in os.listdir(label_dir): if file.endswith(".wav"): file_path = os.path.join(label_dir, file) data.append({"path": file_path, "label": label_id}) # Convertir en dataset Hugging Face ds = Dataset.from_list(data) # Séparer en 80% train / 20% test ds = ds.train_test_split(test_size=0.2) return ds # Contient ds["train"] et ds["test"] # def load_audio_data_from_csv(csv_path, data_dir): # data = [] # df = pd.read_csv(csv_path, sep=",", header=0) # print(df.head()) # for _, row in df.iterrows(): # file_path = os.path.join(data_dir, row["dossier"]) # label = row["emotion"] # if os.path.exists(file_path) and label in LABELS: # data.append({"path": file_path, "label": LABELS[label]}) # else: # print(f"⚠️ Fichier manquant ou label inconnu : {file_path} - {label}") # return Dataset.from_list(data) # #Charger le dataset à partir du CSV # csv_path = os.path.abspath(os.path.join(os.path.dirname(file), "new_data", "dataset.csv")) # data_dir = os.path.abspath(os.path.join(os.path.dirname(file), "new_data")) # ds = load_audio_data_from_csv(csv_path, data_dir)