Spaces:
Sleeping
Sleeping
import os | |
from datasets import Dataset | |
from config import LABELS | |
import pandas as pd | |
def load_audio_data(data_dir): | |
data = [] | |
for label_name, label_id in LABELS.items(): | |
label_dir = os.path.join(data_dir, label_name) | |
for file in os.listdir(label_dir): | |
if file.endswith(".wav"): | |
file_path = os.path.join(label_dir, file) | |
data.append({"path": file_path, "label": label_id}) | |
# Convertir en dataset Hugging Face | |
ds = Dataset.from_list(data) | |
# Séparer en 80% train / 20% test | |
ds = ds.train_test_split(test_size=0.2) | |
return ds # Contient ds["train"] et ds["test"] | |
# def load_audio_data_from_csv(csv_path, data_dir): | |
# data = [] | |
# df = pd.read_csv(csv_path, sep=",", header=0) | |
# print(df.head()) | |
# for _, row in df.iterrows(): | |
# file_path = os.path.join(data_dir, row["dossier"]) | |
# label = row["emotion"] | |
# if os.path.exists(file_path) and label in LABELS: | |
# data.append({"path": file_path, "label": LABELS[label]}) | |
# else: | |
# print(f"⚠️ Fichier manquant ou label inconnu : {file_path} - {label}") | |
# return Dataset.from_list(data) | |
# #Charger le dataset à partir du CSV | |
# csv_path = os.path.abspath(os.path.join(os.path.dirname(file), "new_data", "dataset.csv")) | |
# data_dir = os.path.abspath(os.path.join(os.path.dirname(file), "new_data")) | |
# ds = load_audio_data_from_csv(csv_path, data_dir) | |