Spaces:
Sleeping
Sleeping
Marina Kpamegan
commited on
Commit
·
103eb2f
1
Parent(s):
1534a11
modele rapide
Browse files- app.py +5 -0
- src/data/joie/.DS_Store +0 -0
- src/model/feature_extractor.py +1 -1
- src/predict.py +5 -3
- src/train.py +20 -26
- src/utils/dataset.py +14 -1
- src/utils/preprocessing.py +4 -4
app.py
CHANGED
@@ -3,6 +3,11 @@ from streamlit_option_menu import option_menu
|
|
3 |
from views.studio import studio
|
4 |
from views.emotion_analysis import emotion_analysis
|
5 |
from views.about import about
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
if "model_loaded" not in st.session_state:
|
8 |
st.session_state.model_loaded = None
|
|
|
3 |
from views.studio import studio
|
4 |
from views.emotion_analysis import emotion_analysis
|
5 |
from views.about import about
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
|
9 |
+
sys.path.append(os.path.abspath("src"))
|
10 |
+
sys.path.append(os.path.abspath("."))
|
11 |
|
12 |
if "model_loaded" not in st.session_state:
|
13 |
st.session_state.model_loaded = None
|
src/data/joie/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
src/model/feature_extractor.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import torch
|
2 |
from transformers import Wav2Vec2Model, Wav2Vec2Processor
|
3 |
-
from config import MODEL_NAME, DEVICE
|
4 |
|
5 |
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
|
6 |
feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
|
|
|
1 |
import torch
|
2 |
from transformers import Wav2Vec2Model, Wav2Vec2Processor
|
3 |
+
from src.config import MODEL_NAME, DEVICE
|
4 |
|
5 |
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
|
6 |
feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
|
src/predict.py
CHANGED
@@ -1,9 +1,11 @@
|
|
|
|
|
|
1 |
import torch
|
2 |
import librosa
|
3 |
import numpy as np
|
4 |
-
from model.emotion_classifier import EmotionClassifier
|
5 |
-
from utils.preprocessing import collate_fn
|
6 |
-
from config import DEVICE, NUM_LABELS
|
7 |
import os
|
8 |
|
9 |
# Charger le modèle entraîné
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
import torch
|
4 |
import librosa
|
5 |
import numpy as np
|
6 |
+
from src.model.emotion_classifier import EmotionClassifier
|
7 |
+
from src.utils.preprocessing import collate_fn
|
8 |
+
from src.config import DEVICE, NUM_LABELS
|
9 |
import os
|
10 |
|
11 |
# Charger le modèle entraîné
|
src/train.py
CHANGED
@@ -2,37 +2,31 @@ import torch
|
|
2 |
import torch.optim as optim
|
3 |
import torch.nn as nn
|
4 |
from torch.utils.data import DataLoader
|
5 |
-
import numpy as np
|
6 |
from sklearn.metrics import accuracy_score
|
7 |
from utils.dataset import load_audio_data
|
8 |
from utils.preprocessing import preprocess_audio, prepare_features, collate_fn
|
9 |
from model.emotion_classifier import EmotionClassifier
|
10 |
-
from config import DEVICE, NUM_LABELS
|
11 |
import os
|
12 |
|
13 |
-
#
|
14 |
data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
|
15 |
ds = load_audio_data(data_dir)
|
16 |
|
17 |
-
#
|
18 |
-
ds = ds.map(preprocess_audio)
|
19 |
-
ds = ds.map(lambda batch: prepare_features(batch, max_length=128))
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
|
24 |
|
25 |
-
#
|
26 |
-
|
27 |
-
test_loader = DataLoader(test_ds, batch_size=8, shuffle=False, collate_fn=collate_fn)
|
28 |
|
29 |
-
#
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
# 🔹 Fonction d'entraînement
|
34 |
-
def train_classifier(classifier, train_loader, test_loader, epochs=20, lr=2e-4):
|
35 |
-
optimizer = optim.AdamW(classifier.parameters(), lr=lr, weight_decay=0.01)
|
36 |
loss_fn = nn.CrossEntropyLoss()
|
37 |
best_accuracy = 0.0
|
38 |
|
@@ -55,17 +49,16 @@ def train_classifier(classifier, train_loader, test_loader, epochs=20, lr=2e-4):
|
|
55 |
|
56 |
train_acc = correct / len(train_loader.dataset)
|
57 |
|
58 |
-
# 🔹 Sauvegarde du meilleur modèle
|
59 |
if train_acc > best_accuracy:
|
60 |
best_accuracy = train_acc
|
61 |
-
torch.save(classifier.state_dict(), "
|
62 |
-
print(f"
|
63 |
|
64 |
-
print(f"
|
65 |
|
66 |
return classifier
|
67 |
|
68 |
-
#
|
69 |
def evaluate(model, test_loader):
|
70 |
model.eval()
|
71 |
all_preds, all_labels = [], []
|
@@ -73,6 +66,7 @@ def evaluate(model, test_loader):
|
|
73 |
with torch.no_grad():
|
74 |
for inputs, labels in test_loader:
|
75 |
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
|
|
|
76 |
logits = model(inputs)
|
77 |
preds = torch.argmax(logits, dim=-1).cpu().numpy()
|
78 |
all_preds.extend(preds)
|
@@ -80,7 +74,7 @@ def evaluate(model, test_loader):
|
|
80 |
|
81 |
return accuracy_score(all_labels, all_preds)
|
82 |
|
83 |
-
#
|
84 |
-
trained_classifier = train_classifier(classifier, train_loader, test_loader, epochs=20
|
85 |
|
86 |
print("✅ Entraînement terminé, le meilleur modèle a été sauvegardé !")
|
|
|
2 |
import torch.optim as optim
|
3 |
import torch.nn as nn
|
4 |
from torch.utils.data import DataLoader
|
|
|
5 |
from sklearn.metrics import accuracy_score
|
6 |
from utils.dataset import load_audio_data
|
7 |
from utils.preprocessing import preprocess_audio, prepare_features, collate_fn
|
8 |
from model.emotion_classifier import EmotionClassifier
|
9 |
+
from src.config import DEVICE, NUM_LABELS
|
10 |
import os
|
11 |
|
12 |
+
# Charger les données et les séparer en train / test
|
13 |
data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
|
14 |
ds = load_audio_data(data_dir)
|
15 |
|
16 |
+
# Prétraitement
|
17 |
+
ds["train"] = ds["train"].map(preprocess_audio).map(lambda batch: prepare_features(batch, max_length=128))
|
18 |
+
ds["test"] = ds["test"].map(preprocess_audio).map(lambda batch: prepare_features(batch, max_length=128))
|
19 |
|
20 |
+
# DataLoader
|
21 |
+
train_loader = DataLoader(ds["train"], batch_size=8, shuffle=True, collate_fn=collate_fn)
|
22 |
+
test_loader = DataLoader(ds["test"], batch_size=8, shuffle=False, collate_fn=collate_fn)
|
23 |
|
24 |
+
# Instancier le modèle
|
25 |
+
classifier = EmotionClassifier(feature_dim=40, num_labels=NUM_LABELS).to(DEVICE)
|
|
|
26 |
|
27 |
+
# Fonction d'entraînement
|
28 |
+
def train_classifier(classifier, train_loader, test_loader, epochs=20):
|
29 |
+
optimizer = optim.AdamW(classifier.parameters(), lr=2e-5, weight_decay=0.01)
|
|
|
|
|
|
|
|
|
30 |
loss_fn = nn.CrossEntropyLoss()
|
31 |
best_accuracy = 0.0
|
32 |
|
|
|
49 |
|
50 |
train_acc = correct / len(train_loader.dataset)
|
51 |
|
|
|
52 |
if train_acc > best_accuracy:
|
53 |
best_accuracy = train_acc
|
54 |
+
torch.save(classifier.state_dict(), "best_model.pth")
|
55 |
+
print(f"✔️ Nouveau meilleur modèle sauvegardé ! Accuracy: {best_accuracy:.4f}")
|
56 |
|
57 |
+
print(f"📢 Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f} - Accuracy: {train_acc:.4f}")
|
58 |
|
59 |
return classifier
|
60 |
|
61 |
+
# Évaluer le modèle
|
62 |
def evaluate(model, test_loader):
|
63 |
model.eval()
|
64 |
all_preds, all_labels = [], []
|
|
|
66 |
with torch.no_grad():
|
67 |
for inputs, labels in test_loader:
|
68 |
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
|
69 |
+
|
70 |
logits = model(inputs)
|
71 |
preds = torch.argmax(logits, dim=-1).cpu().numpy()
|
72 |
all_preds.extend(preds)
|
|
|
74 |
|
75 |
return accuracy_score(all_labels, all_preds)
|
76 |
|
77 |
+
# Lancer l'entraînement
|
78 |
+
trained_classifier = train_classifier(classifier, train_loader, test_loader, epochs=20)
|
79 |
|
80 |
print("✅ Entraînement terminé, le meilleur modèle a été sauvegardé !")
|
src/utils/dataset.py
CHANGED
@@ -3,6 +3,11 @@ from datasets import Dataset
|
|
3 |
from config import LABELS
|
4 |
import pandas as pd
|
5 |
|
|
|
|
|
|
|
|
|
|
|
6 |
def load_audio_data(data_dir):
|
7 |
data = []
|
8 |
for label_name, label_id in LABELS.items():
|
@@ -11,7 +16,15 @@ def load_audio_data(data_dir):
|
|
11 |
if file.endswith(".wav"):
|
12 |
file_path = os.path.join(label_dir, file)
|
13 |
data.append({"path": file_path, "label": label_id})
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
# def load_audio_data_from_csv(csv_path, data_dir):
|
|
|
3 |
from config import LABELS
|
4 |
import pandas as pd
|
5 |
|
6 |
+
import os
|
7 |
+
from datasets import Dataset, DatasetDict
|
8 |
+
import pandas as pd
|
9 |
+
from config import LABELS
|
10 |
+
|
11 |
def load_audio_data(data_dir):
|
12 |
data = []
|
13 |
for label_name, label_id in LABELS.items():
|
|
|
16 |
if file.endswith(".wav"):
|
17 |
file_path = os.path.join(label_dir, file)
|
18 |
data.append({"path": file_path, "label": label_id})
|
19 |
+
|
20 |
+
# Convertir en dataset Hugging Face
|
21 |
+
ds = Dataset.from_list(data)
|
22 |
+
|
23 |
+
# Séparer en 80% train / 20% test
|
24 |
+
ds = ds.train_test_split(test_size=0.2)
|
25 |
+
return ds # Contient ds["train"] et ds["test"]
|
26 |
+
|
27 |
+
|
28 |
|
29 |
|
30 |
# def load_audio_data_from_csv(csv_path, data_dir):
|
src/utils/preprocessing.py
CHANGED
@@ -3,8 +3,8 @@ import soundfile as sf
|
|
3 |
import torch
|
4 |
import torchaudio
|
5 |
import numpy as np
|
6 |
-
from model.feature_extractor import processor # type: ignore
|
7 |
-
from config import DEVICE
|
8 |
|
9 |
# Resampler pour convertir en 16kHz
|
10 |
resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
|
@@ -43,7 +43,7 @@ def prepare_features(batch, max_length):
|
|
43 |
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
|
44 |
|
45 |
# Debugging: afficher la forme des MFCCs
|
46 |
-
print(f"MFCC original shape: {mfcc.shape}")
|
47 |
|
48 |
# Ajuster la longueur des MFCCs
|
49 |
if mfcc.shape[1] > max_length:
|
@@ -52,7 +52,7 @@ def prepare_features(batch, max_length):
|
|
52 |
pad_width = max_length - mfcc.shape[1]
|
53 |
mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant') # Padding si trop court
|
54 |
|
55 |
-
print(f"MFCC padded shape: {mfcc.shape}")
|
56 |
|
57 |
# Convertir en tensor PyTorch et stocker
|
58 |
batch["input_values"] = torch.tensor(mfcc.T, dtype=torch.float32) # Transposer pour obtenir (max_length, 40)
|
|
|
3 |
import torch
|
4 |
import torchaudio
|
5 |
import numpy as np
|
6 |
+
from src.model.feature_extractor import processor # type: ignore
|
7 |
+
from src.config import DEVICE
|
8 |
|
9 |
# Resampler pour convertir en 16kHz
|
10 |
resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
|
|
|
43 |
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
|
44 |
|
45 |
# Debugging: afficher la forme des MFCCs
|
46 |
+
# print(f"MFCC original shape: {mfcc.shape}")
|
47 |
|
48 |
# Ajuster la longueur des MFCCs
|
49 |
if mfcc.shape[1] > max_length:
|
|
|
52 |
pad_width = max_length - mfcc.shape[1]
|
53 |
mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant') # Padding si trop court
|
54 |
|
55 |
+
# print(f"MFCC padded shape: {mfcc.shape}")
|
56 |
|
57 |
# Convertir en tensor PyTorch et stocker
|
58 |
batch["input_values"] = torch.tensor(mfcc.T, dtype=torch.float32) # Transposer pour obtenir (max_length, 40)
|