Marina Kpamegan commited on
Commit
103eb2f
·
1 Parent(s): 1534a11

modele rapide

Browse files
app.py CHANGED
@@ -3,6 +3,11 @@ from streamlit_option_menu import option_menu
3
  from views.studio import studio
4
  from views.emotion_analysis import emotion_analysis
5
  from views.about import about
 
 
 
 
 
6
 
7
  if "model_loaded" not in st.session_state:
8
  st.session_state.model_loaded = None
 
3
  from views.studio import studio
4
  from views.emotion_analysis import emotion_analysis
5
  from views.about import about
6
+ import os
7
+ import sys
8
+
9
+ sys.path.append(os.path.abspath("src"))
10
+ sys.path.append(os.path.abspath("."))
11
 
12
  if "model_loaded" not in st.session_state:
13
  st.session_state.model_loaded = None
src/data/joie/.DS_Store DELETED
Binary file (6.15 kB)
 
src/model/feature_extractor.py CHANGED
@@ -1,6 +1,6 @@
1
  import torch
2
  from transformers import Wav2Vec2Model, Wav2Vec2Processor
3
- from config import MODEL_NAME, DEVICE
4
 
5
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
6
  feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
 
1
  import torch
2
  from transformers import Wav2Vec2Model, Wav2Vec2Processor
3
+ from src.config import MODEL_NAME, DEVICE
4
 
5
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
6
  feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
src/predict.py CHANGED
@@ -1,9 +1,11 @@
 
 
1
  import torch
2
  import librosa
3
  import numpy as np
4
- from model.emotion_classifier import EmotionClassifier
5
- from utils.preprocessing import collate_fn
6
- from config import DEVICE, NUM_LABELS
7
  import os
8
 
9
  # Charger le modèle entraîné
 
1
+ import sys
2
+ import os
3
  import torch
4
  import librosa
5
  import numpy as np
6
+ from src.model.emotion_classifier import EmotionClassifier
7
+ from src.utils.preprocessing import collate_fn
8
+ from src.config import DEVICE, NUM_LABELS
9
  import os
10
 
11
  # Charger le modèle entraîné
src/train.py CHANGED
@@ -2,37 +2,31 @@ import torch
2
  import torch.optim as optim
3
  import torch.nn as nn
4
  from torch.utils.data import DataLoader
5
- import numpy as np
6
  from sklearn.metrics import accuracy_score
7
  from utils.dataset import load_audio_data
8
  from utils.preprocessing import preprocess_audio, prepare_features, collate_fn
9
  from model.emotion_classifier import EmotionClassifier
10
- from config import DEVICE, NUM_LABELS
11
  import os
12
 
13
- # 🔹 Charger les données
14
  data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
15
  ds = load_audio_data(data_dir)
16
 
17
- # 🔹 Prétraitement des données
18
- ds = ds.map(preprocess_audio)
19
- ds = ds.map(lambda batch: prepare_features(batch, max_length=128))
20
 
21
- # 🔹 Séparation en train et test
22
- ds = ds.train_test_split(test_size=0.2)
23
- train_ds, test_ds = ds["train"], ds["test"]
24
 
25
- # 🔹 Création des DataLoaders avec `collate_fn`
26
- train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, collate_fn=collate_fn)
27
- test_loader = DataLoader(test_ds, batch_size=8, shuffle=False, collate_fn=collate_fn)
28
 
29
- # 🔹 Instancier le modèle
30
- feature_dim = 40 # Nombre de MFCCs
31
- classifier = EmotionClassifier(feature_dim, NUM_LABELS).to(DEVICE)
32
-
33
- # 🔹 Fonction d'entraînement
34
- def train_classifier(classifier, train_loader, test_loader, epochs=20, lr=2e-4):
35
- optimizer = optim.AdamW(classifier.parameters(), lr=lr, weight_decay=0.01)
36
  loss_fn = nn.CrossEntropyLoss()
37
  best_accuracy = 0.0
38
 
@@ -55,17 +49,16 @@ def train_classifier(classifier, train_loader, test_loader, epochs=20, lr=2e-4):
55
 
56
  train_acc = correct / len(train_loader.dataset)
57
 
58
- # 🔹 Sauvegarde du meilleur modèle
59
  if train_acc > best_accuracy:
60
  best_accuracy = train_acc
61
- torch.save(classifier.state_dict(), "acc_model.pth")
62
- print(f" Nouveau meilleur modèle sauvegardé ! Accuracy: {best_accuracy:.4f}")
63
 
64
- print(f"📊 Epoch {epoch+1}/{epochs} - Loss: {total_loss/len(train_loader):.4f} - Accuracy: {train_acc:.4f}")
65
 
66
  return classifier
67
 
68
- # 🔹 Fonction d'évaluation
69
  def evaluate(model, test_loader):
70
  model.eval()
71
  all_preds, all_labels = [], []
@@ -73,6 +66,7 @@ def evaluate(model, test_loader):
73
  with torch.no_grad():
74
  for inputs, labels in test_loader:
75
  inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
 
76
  logits = model(inputs)
77
  preds = torch.argmax(logits, dim=-1).cpu().numpy()
78
  all_preds.extend(preds)
@@ -80,7 +74,7 @@ def evaluate(model, test_loader):
80
 
81
  return accuracy_score(all_labels, all_preds)
82
 
83
- # 🔹 Lancer l'entraînement
84
- trained_classifier = train_classifier(classifier, train_loader, test_loader, epochs=20, lr=2e-4)
85
 
86
  print("✅ Entraînement terminé, le meilleur modèle a été sauvegardé !")
 
2
  import torch.optim as optim
3
  import torch.nn as nn
4
  from torch.utils.data import DataLoader
 
5
  from sklearn.metrics import accuracy_score
6
  from utils.dataset import load_audio_data
7
  from utils.preprocessing import preprocess_audio, prepare_features, collate_fn
8
  from model.emotion_classifier import EmotionClassifier
9
+ from src.config import DEVICE, NUM_LABELS
10
  import os
11
 
12
+ # Charger les données et les séparer en train / test
13
  data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
14
  ds = load_audio_data(data_dir)
15
 
16
+ # Prétraitement
17
+ ds["train"] = ds["train"].map(preprocess_audio).map(lambda batch: prepare_features(batch, max_length=128))
18
+ ds["test"] = ds["test"].map(preprocess_audio).map(lambda batch: prepare_features(batch, max_length=128))
19
 
20
+ # DataLoader
21
+ train_loader = DataLoader(ds["train"], batch_size=8, shuffle=True, collate_fn=collate_fn)
22
+ test_loader = DataLoader(ds["test"], batch_size=8, shuffle=False, collate_fn=collate_fn)
23
 
24
+ # Instancier le modèle
25
+ classifier = EmotionClassifier(feature_dim=40, num_labels=NUM_LABELS).to(DEVICE)
 
26
 
27
+ # Fonction d'entraînement
28
+ def train_classifier(classifier, train_loader, test_loader, epochs=20):
29
+ optimizer = optim.AdamW(classifier.parameters(), lr=2e-5, weight_decay=0.01)
 
 
 
 
30
  loss_fn = nn.CrossEntropyLoss()
31
  best_accuracy = 0.0
32
 
 
49
 
50
  train_acc = correct / len(train_loader.dataset)
51
 
 
52
  if train_acc > best_accuracy:
53
  best_accuracy = train_acc
54
+ torch.save(classifier.state_dict(), "best_model.pth")
55
+ print(f"✔️ Nouveau meilleur modèle sauvegardé ! Accuracy: {best_accuracy:.4f}")
56
 
57
+ print(f"📢 Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f} - Accuracy: {train_acc:.4f}")
58
 
59
  return classifier
60
 
61
+ # Évaluer le modèle
62
  def evaluate(model, test_loader):
63
  model.eval()
64
  all_preds, all_labels = [], []
 
66
  with torch.no_grad():
67
  for inputs, labels in test_loader:
68
  inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
69
+
70
  logits = model(inputs)
71
  preds = torch.argmax(logits, dim=-1).cpu().numpy()
72
  all_preds.extend(preds)
 
74
 
75
  return accuracy_score(all_labels, all_preds)
76
 
77
+ # Lancer l'entraînement
78
+ trained_classifier = train_classifier(classifier, train_loader, test_loader, epochs=20)
79
 
80
  print("✅ Entraînement terminé, le meilleur modèle a été sauvegardé !")
src/utils/dataset.py CHANGED
@@ -3,6 +3,11 @@ from datasets import Dataset
3
  from config import LABELS
4
  import pandas as pd
5
 
 
 
 
 
 
6
  def load_audio_data(data_dir):
7
  data = []
8
  for label_name, label_id in LABELS.items():
@@ -11,7 +16,15 @@ def load_audio_data(data_dir):
11
  if file.endswith(".wav"):
12
  file_path = os.path.join(label_dir, file)
13
  data.append({"path": file_path, "label": label_id})
14
- return Dataset.from_list(data)
 
 
 
 
 
 
 
 
15
 
16
 
17
  # def load_audio_data_from_csv(csv_path, data_dir):
 
3
  from config import LABELS
4
  import pandas as pd
5
 
6
+ import os
7
+ from datasets import Dataset, DatasetDict
8
+ import pandas as pd
9
+ from config import LABELS
10
+
11
  def load_audio_data(data_dir):
12
  data = []
13
  for label_name, label_id in LABELS.items():
 
16
  if file.endswith(".wav"):
17
  file_path = os.path.join(label_dir, file)
18
  data.append({"path": file_path, "label": label_id})
19
+
20
+ # Convertir en dataset Hugging Face
21
+ ds = Dataset.from_list(data)
22
+
23
+ # Séparer en 80% train / 20% test
24
+ ds = ds.train_test_split(test_size=0.2)
25
+ return ds # Contient ds["train"] et ds["test"]
26
+
27
+
28
 
29
 
30
  # def load_audio_data_from_csv(csv_path, data_dir):
src/utils/preprocessing.py CHANGED
@@ -3,8 +3,8 @@ import soundfile as sf
3
  import torch
4
  import torchaudio
5
  import numpy as np
6
- from model.feature_extractor import processor # type: ignore
7
- from config import DEVICE
8
 
9
  # Resampler pour convertir en 16kHz
10
  resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
@@ -43,7 +43,7 @@ def prepare_features(batch, max_length):
43
  mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
44
 
45
  # Debugging: afficher la forme des MFCCs
46
- print(f"MFCC original shape: {mfcc.shape}")
47
 
48
  # Ajuster la longueur des MFCCs
49
  if mfcc.shape[1] > max_length:
@@ -52,7 +52,7 @@ def prepare_features(batch, max_length):
52
  pad_width = max_length - mfcc.shape[1]
53
  mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant') # Padding si trop court
54
 
55
- print(f"MFCC padded shape: {mfcc.shape}")
56
 
57
  # Convertir en tensor PyTorch et stocker
58
  batch["input_values"] = torch.tensor(mfcc.T, dtype=torch.float32) # Transposer pour obtenir (max_length, 40)
 
3
  import torch
4
  import torchaudio
5
  import numpy as np
6
+ from src.model.feature_extractor import processor # type: ignore
7
+ from src.config import DEVICE
8
 
9
  # Resampler pour convertir en 16kHz
10
  resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
 
43
  mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
44
 
45
  # Debugging: afficher la forme des MFCCs
46
+ # print(f"MFCC original shape: {mfcc.shape}")
47
 
48
  # Ajuster la longueur des MFCCs
49
  if mfcc.shape[1] > max_length:
 
52
  pad_width = max_length - mfcc.shape[1]
53
  mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant') # Padding si trop court
54
 
55
+ # print(f"MFCC padded shape: {mfcc.shape}")
56
 
57
  # Convertir en tensor PyTorch et stocker
58
  batch["input_values"] = torch.tensor(mfcc.T, dtype=torch.float32) # Transposer pour obtenir (max_length, 40)