Dpngtm commited on
Commit
e111c36
·
verified ·
1 Parent(s): bed343f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
4
+ import torchaudio
5
+
6
+ # Load model and processor
7
+ model_name = "Dpngtm/wave2vec2-emotion-recognition" # Replace with your model's Hugging Face Hub path
8
+ model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
9
+ processor = Wav2Vec2Processor.from_pretrained(model_name)
10
+
11
+ # Define device (use GPU if available)
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ model.to(device)
14
+
15
+ # Preprocessing and inference function
16
+ def recognize_emotion(audio):
17
+ # Load and resample audio to 16kHz
18
+ speech_array, sampling_rate = torchaudio.load(audio)
19
+ if sampling_rate != 16000:
20
+ resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
21
+ speech_array = resampler(speech_array)
22
+ speech_array = speech_array.mean(dim=0).numpy() # Convert to mono if multi-channel
23
+
24
+ # Process input and make predictions
25
+ inputs = processor(speech_array, sampling_rate=16000, return_tensors="pt", padding=True)
26
+ inputs = {k: v.to(device) for k, v in inputs.items()}
27
+ with torch.no_grad():
28
+ logits = model(**inputs).logits
29
+ predicted_id = torch.argmax(logits, dim=-1).item()
30
+
31
+ # Define emotion labels (use the same order as during training)
32
+ # Emotion labels mapped to indices
33
+ emotion_labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
34
+ return emotion_labels[predicted_id]
35
+
36
+ # Gradio interface
37
+ interface = gr.Interface(
38
+ fn=recognize_emotion,
39
+ inputs=gr.Audio(source="microphone", type="filepath"),
40
+ outputs="text",
41
+ title="Emotion Recognition with Wav2Vec2",
42
+ description="Upload or record audio, and the model will predict the emotion."
43
+ )
44
+
45
+ # Launch the app
46
+ interface.launch()