Boltz79 commited on
Commit
9729a4f
·
verified ·
1 Parent(s): 6f98b5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -9,6 +9,7 @@ from speechbrain.inference.interfaces import foreign_class
9
  import io
10
  import matplotlib.pyplot as plt
11
  import librosa.display
 
12
 
13
  # Try to import noisereduce (if not available, noise reduction will be skipped)
14
  try:
@@ -103,7 +104,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
103
  Main prediction function:
104
  - Uses ensemble prediction if enabled.
105
  - Otherwise, processes the entire audio at once.
106
- Returns the emotion label enhanced with an emoji.
107
  """
108
  try:
109
  if use_ensemble:
@@ -118,7 +119,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
118
 
119
  def plot_waveform(audio_file):
120
  """
121
- Generate and return a waveform plot image for the given audio file.
122
  """
123
  y, sr = librosa.load(audio_file, sr=16000, mono=True)
124
  plt.figure(figsize=(10, 3))
@@ -128,12 +129,14 @@ def plot_waveform(audio_file):
128
  plt.savefig(buf, format="png")
129
  plt.close()
130
  buf.seek(0)
131
- return buf.read()
 
 
132
 
133
  def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
134
  """
135
  Run emotion prediction and generate a waveform plot.
136
- Returns a tuple: (emotion label with emoji, waveform image).
137
  """
138
  emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
139
  waveform = plot_waveform(audio_file)
@@ -151,7 +154,6 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
151
  with gr.Tabs():
152
  with gr.TabItem("Emotion Recognition"):
153
  with gr.Row():
154
- # 'source' argument removed to avoid errors
155
  audio_input = gr.Audio(type="filepath", label="Upload Audio")
156
  use_ensemble = gr.Checkbox(label="Use Ensemble Prediction (for long audio)", value=False)
157
  apply_noise_reduction = gr.Checkbox(label="Apply Noise Reduction", value=False)
@@ -160,7 +162,8 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
160
  overlap = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Segment Overlap (s)")
161
  predict_button = gr.Button("Predict Emotion")
162
  result_text = gr.Textbox(label="Predicted Emotion")
163
- waveform_image = gr.Image(label="Audio Waveform", type="auto")
 
164
 
165
  predict_button.click(
166
  predict_and_plot,
 
9
  import io
10
  import matplotlib.pyplot as plt
11
  import librosa.display
12
+ from PIL import Image # Added for image conversion
13
 
14
  # Try to import noisereduce (if not available, noise reduction will be skipped)
15
  try:
 
104
  Main prediction function:
105
  - Uses ensemble prediction if enabled.
106
  - Otherwise, processes the entire audio at once.
107
+ Returns the emotion label enhanced with an emoji.
108
  """
109
  try:
110
  if use_ensemble:
 
119
 
120
  def plot_waveform(audio_file):
121
  """
122
+ Generate and return a waveform plot image (as a PIL Image) for the given audio file.
123
  """
124
  y, sr = librosa.load(audio_file, sr=16000, mono=True)
125
  plt.figure(figsize=(10, 3))
 
129
  plt.savefig(buf, format="png")
130
  plt.close()
131
  buf.seek(0)
132
+ # Convert buffer to PIL Image
133
+ image = Image.open(buf)
134
+ return image
135
 
136
  def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
137
  """
138
  Run emotion prediction and generate a waveform plot.
139
+ Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
140
  """
141
  emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
142
  waveform = plot_waveform(audio_file)
 
154
  with gr.Tabs():
155
  with gr.TabItem("Emotion Recognition"):
156
  with gr.Row():
 
157
  audio_input = gr.Audio(type="filepath", label="Upload Audio")
158
  use_ensemble = gr.Checkbox(label="Use Ensemble Prediction (for long audio)", value=False)
159
  apply_noise_reduction = gr.Checkbox(label="Apply Noise Reduction", value=False)
 
162
  overlap = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Segment Overlap (s)")
163
  predict_button = gr.Button("Predict Emotion")
164
  result_text = gr.Textbox(label="Predicted Emotion")
165
+ # Set type to "pil" since we are returning a PIL Image
166
+ waveform_image = gr.Image(label="Audio Waveform", type="pil")
167
 
168
  predict_button.click(
169
  predict_and_plot,