File size: 1,567 Bytes
06c46fb
 
 
1534a11
06c46fb
103eb2f
 
 
 
 
06c46fb
 
 
 
 
 
 
 
103eb2f
 
 
 
 
 
 
 
 
1534a11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
from datasets import Dataset
from config import LABELS
import pandas as pd

import os
from datasets import Dataset, DatasetDict
import pandas as pd
from config import LABELS  

def load_audio_data(data_dir):
    data = []
    for label_name, label_id in LABELS.items():
        label_dir = os.path.join(data_dir, label_name)
        for file in os.listdir(label_dir):
            if file.endswith(".wav"):
                file_path = os.path.join(label_dir, file)
                data.append({"path": file_path, "label": label_id})

    # Convertir en dataset Hugging Face
    ds = Dataset.from_list(data)

    # Séparer en 80% train / 20% test
    ds = ds.train_test_split(test_size=0.2)
    return ds  # Contient ds["train"] et ds["test"]




# def load_audio_data_from_csv(csv_path, data_dir):
#     data = []
#     df = pd.read_csv(csv_path, sep=",", header=0)
#     print(df.head())

#     for _, row in df.iterrows():
#         file_path = os.path.join(data_dir, row["dossier"])
#         label = row["emotion"]

#         if os.path.exists(file_path) and label in LABELS:
#             data.append({"path": file_path, "label": LABELS[label]})
#         else:
#             print(f"⚠️ Fichier manquant ou label inconnu : {file_path} - {label}")

#     return Dataset.from_list(data)

# #Charger le dataset à partir du CSV
# csv_path = os.path.abspath(os.path.join(os.path.dirname(file), "new_data", "dataset.csv"))
# data_dir = os.path.abspath(os.path.join(os.path.dirname(file), "new_data"))
# ds = load_audio_data_from_csv(csv_path, data_dir)