Marina Kpamegan
modele rapide
103eb2f
raw
history blame
1.57 kB
import os
from datasets import Dataset
from config import LABELS
import pandas as pd
import os
from datasets import Dataset, DatasetDict
import pandas as pd
from config import LABELS
def load_audio_data(data_dir):
data = []
for label_name, label_id in LABELS.items():
label_dir = os.path.join(data_dir, label_name)
for file in os.listdir(label_dir):
if file.endswith(".wav"):
file_path = os.path.join(label_dir, file)
data.append({"path": file_path, "label": label_id})
# Convertir en dataset Hugging Face
ds = Dataset.from_list(data)
# Séparer en 80% train / 20% test
ds = ds.train_test_split(test_size=0.2)
return ds # Contient ds["train"] et ds["test"]
# def load_audio_data_from_csv(csv_path, data_dir):
# data = []
# df = pd.read_csv(csv_path, sep=",", header=0)
# print(df.head())
# for _, row in df.iterrows():
# file_path = os.path.join(data_dir, row["dossier"])
# label = row["emotion"]
# if os.path.exists(file_path) and label in LABELS:
# data.append({"path": file_path, "label": LABELS[label]})
# else:
# print(f"⚠️ Fichier manquant ou label inconnu : {file_path} - {label}")
# return Dataset.from_list(data)
# #Charger le dataset à partir du CSV
# csv_path = os.path.abspath(os.path.join(os.path.dirname(file), "new_data", "dataset.csv"))
# data_dir = os.path.abspath(os.path.join(os.path.dirname(file), "new_data"))
# ds = load_audio_data_from_csv(csv_path, data_dir)