Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ from speechbrain.inference.interfaces import foreign_class
|
|
9 |
import io
|
10 |
import matplotlib.pyplot as plt
|
11 |
import librosa.display
|
|
|
12 |
|
13 |
# Try to import noisereduce (if not available, noise reduction will be skipped)
|
14 |
try:
|
@@ -103,7 +104,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
|
|
103 |
Main prediction function:
|
104 |
- Uses ensemble prediction if enabled.
|
105 |
- Otherwise, processes the entire audio at once.
|
106 |
-
|
107 |
"""
|
108 |
try:
|
109 |
if use_ensemble:
|
@@ -118,7 +119,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
|
|
118 |
|
119 |
def plot_waveform(audio_file):
|
120 |
"""
|
121 |
-
Generate and return a waveform plot image for the given audio file.
|
122 |
"""
|
123 |
y, sr = librosa.load(audio_file, sr=16000, mono=True)
|
124 |
plt.figure(figsize=(10, 3))
|
@@ -128,12 +129,14 @@ def plot_waveform(audio_file):
|
|
128 |
plt.savefig(buf, format="png")
|
129 |
plt.close()
|
130 |
buf.seek(0)
|
131 |
-
|
|
|
|
|
132 |
|
133 |
def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
|
134 |
"""
|
135 |
Run emotion prediction and generate a waveform plot.
|
136 |
-
Returns a tuple: (emotion label with emoji, waveform image).
|
137 |
"""
|
138 |
emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
|
139 |
waveform = plot_waveform(audio_file)
|
@@ -151,7 +154,6 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
|
|
151 |
with gr.Tabs():
|
152 |
with gr.TabItem("Emotion Recognition"):
|
153 |
with gr.Row():
|
154 |
-
# 'source' argument removed to avoid errors
|
155 |
audio_input = gr.Audio(type="filepath", label="Upload Audio")
|
156 |
use_ensemble = gr.Checkbox(label="Use Ensemble Prediction (for long audio)", value=False)
|
157 |
apply_noise_reduction = gr.Checkbox(label="Apply Noise Reduction", value=False)
|
@@ -160,7 +162,8 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
|
|
160 |
overlap = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Segment Overlap (s)")
|
161 |
predict_button = gr.Button("Predict Emotion")
|
162 |
result_text = gr.Textbox(label="Predicted Emotion")
|
163 |
-
|
|
|
164 |
|
165 |
predict_button.click(
|
166 |
predict_and_plot,
|
|
|
9 |
import io
|
10 |
import matplotlib.pyplot as plt
|
11 |
import librosa.display
|
12 |
+
from PIL import Image # Added for image conversion
|
13 |
|
14 |
# Try to import noisereduce (if not available, noise reduction will be skipped)
|
15 |
try:
|
|
|
104 |
Main prediction function:
|
105 |
- Uses ensemble prediction if enabled.
|
106 |
- Otherwise, processes the entire audio at once.
|
107 |
+
Returns the emotion label enhanced with an emoji.
|
108 |
"""
|
109 |
try:
|
110 |
if use_ensemble:
|
|
|
119 |
|
120 |
def plot_waveform(audio_file):
|
121 |
"""
|
122 |
+
Generate and return a waveform plot image (as a PIL Image) for the given audio file.
|
123 |
"""
|
124 |
y, sr = librosa.load(audio_file, sr=16000, mono=True)
|
125 |
plt.figure(figsize=(10, 3))
|
|
|
129 |
plt.savefig(buf, format="png")
|
130 |
plt.close()
|
131 |
buf.seek(0)
|
132 |
+
# Convert buffer to PIL Image
|
133 |
+
image = Image.open(buf)
|
134 |
+
return image
|
135 |
|
136 |
def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
|
137 |
"""
|
138 |
Run emotion prediction and generate a waveform plot.
|
139 |
+
Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
|
140 |
"""
|
141 |
emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
|
142 |
waveform = plot_waveform(audio_file)
|
|
|
154 |
with gr.Tabs():
|
155 |
with gr.TabItem("Emotion Recognition"):
|
156 |
with gr.Row():
|
|
|
157 |
audio_input = gr.Audio(type="filepath", label="Upload Audio")
|
158 |
use_ensemble = gr.Checkbox(label="Use Ensemble Prediction (for long audio)", value=False)
|
159 |
apply_noise_reduction = gr.Checkbox(label="Apply Noise Reduction", value=False)
|
|
|
162 |
overlap = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Segment Overlap (s)")
|
163 |
predict_button = gr.Button("Predict Emotion")
|
164 |
result_text = gr.Textbox(label="Predicted Emotion")
|
165 |
+
# Set type to "pil" since we are returning a PIL Image
|
166 |
+
waveform_image = gr.Image(label="Audio Waveform", type="pil")
|
167 |
|
168 |
predict_button.click(
|
169 |
predict_and_plot,
|