jdalfonso's picture
:rocket: feature new interface
201ed31
raw
history blame
1.47 kB
import os
from datasets import Dataset
from config import LABELS
import pandas as pd
def load_audio_data(data_dir):
data = []
for label_name, label_id in LABELS.items():
label_dir = os.path.join(data_dir, label_name)
for file in os.listdir(label_dir):
if file.endswith(".wav"):
file_path = os.path.join(label_dir, file)
data.append({"path": file_path, "label": label_id})
# Convertir en dataset Hugging Face
ds = Dataset.from_list(data)
# Séparer en 80% train / 20% test
ds = ds.train_test_split(test_size=0.2)
return ds # Contient ds["train"] et ds["test"]
# def load_audio_data_from_csv(csv_path, data_dir):
# data = []
# df = pd.read_csv(csv_path, sep=",", header=0)
# print(df.head())
# for _, row in df.iterrows():
# file_path = os.path.join(data_dir, row["dossier"])
# label = row["emotion"]
# if os.path.exists(file_path) and label in LABELS:
# data.append({"path": file_path, "label": LABELS[label]})
# else:
# print(f"⚠️ Fichier manquant ou label inconnu : {file_path} - {label}")
# return Dataset.from_list(data)
# #Charger le dataset à partir du CSV
# csv_path = os.path.abspath(os.path.join(os.path.dirname(file), "new_data", "dataset.csv"))
# data_dir = os.path.abspath(os.path.join(os.path.dirname(file), "new_data"))
# ds = load_audio_data_from_csv(csv_path, data_dir)