Spaces:
Sleeping
Sleeping
File size: 1,567 Bytes
06c46fb 1534a11 06c46fb 103eb2f 06c46fb 103eb2f 1534a11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import os
from datasets import Dataset
from config import LABELS
import pandas as pd
import os
from datasets import Dataset, DatasetDict
import pandas as pd
from config import LABELS
def load_audio_data(data_dir):
data = []
for label_name, label_id in LABELS.items():
label_dir = os.path.join(data_dir, label_name)
for file in os.listdir(label_dir):
if file.endswith(".wav"):
file_path = os.path.join(label_dir, file)
data.append({"path": file_path, "label": label_id})
# Convertir en dataset Hugging Face
ds = Dataset.from_list(data)
# Séparer en 80% train / 20% test
ds = ds.train_test_split(test_size=0.2)
return ds # Contient ds["train"] et ds["test"]
# def load_audio_data_from_csv(csv_path, data_dir):
# data = []
# df = pd.read_csv(csv_path, sep=",", header=0)
# print(df.head())
# for _, row in df.iterrows():
# file_path = os.path.join(data_dir, row["dossier"])
# label = row["emotion"]
# if os.path.exists(file_path) and label in LABELS:
# data.append({"path": file_path, "label": LABELS[label]})
# else:
# print(f"⚠️ Fichier manquant ou label inconnu : {file_path} - {label}")
# return Dataset.from_list(data)
# #Charger le dataset à partir du CSV
# csv_path = os.path.abspath(os.path.join(os.path.dirname(file), "new_data", "dataset.csv"))
# data_dir = os.path.abspath(os.path.join(os.path.dirname(file), "new_data"))
# ds = load_audio_data_from_csv(csv_path, data_dir)
|