Spaces:

Boltz79
/

Sentiment-Analysis

Sleeping

App Files Files Community

Boltz79 commited on Feb 8

Commit

9729a4f

verified ·

1 Parent(s): 6f98b5f

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -6

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from speechbrain.inference.interfaces import foreign_class
 import io
 import matplotlib.pyplot as plt
 import librosa.display
 # Try to import noisereduce (if not available, noise reduction will be skipped)
 try:
@@ -103,7 +104,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
     Main prediction function:
       - Uses ensemble prediction if enabled.
       - Otherwise, processes the entire audio at once.
-      Returns the emotion label enhanced with an emoji.
     """
     try:
         if use_ensemble:
@@ -118,7 +119,7 @@ def predict_emotion(audio_file, use_ensemble=False, apply_noise_reduction=False,
 def plot_waveform(audio_file):
     """
-    Generate and return a waveform plot image for the given audio file.
     """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     plt.figure(figsize=(10, 3))
@@ -128,12 +129,14 @@ def plot_waveform(audio_file):
     plt.savefig(buf, format="png")
     plt.close()
     buf.seek(0)
-    return buf.read()
 def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
     """
     Run emotion prediction and generate a waveform plot.
-    Returns a tuple: (emotion label with emoji, waveform image).
     """
     emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
     waveform = plot_waveform(audio_file)
@@ -151,7 +154,6 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
     with gr.Tabs():
         with gr.TabItem("Emotion Recognition"):
             with gr.Row():
-                # 'source' argument removed to avoid errors
                 audio_input = gr.Audio(type="filepath", label="Upload Audio")
             use_ensemble = gr.Checkbox(label="Use Ensemble Prediction (for long audio)", value=False)
             apply_noise_reduction = gr.Checkbox(label="Apply Noise Reduction", value=False)
@@ -160,7 +162,8 @@ with gr.Blocks(css=".gradio-container {background-color: #f7f7f7; font-family: A
                 overlap = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Segment Overlap (s)")
             predict_button = gr.Button("Predict Emotion")
             result_text = gr.Textbox(label="Predicted Emotion")
-            waveform_image = gr.Image(label="Audio Waveform", type="auto")
             predict_button.click(
                 predict_and_plot,

 import io
 import matplotlib.pyplot as plt
 import librosa.display
+from PIL import Image  # Added for image conversion
 # Try to import noisereduce (if not available, noise reduction will be skipped)
 try:
     Main prediction function:
       - Uses ensemble prediction if enabled.
       - Otherwise, processes the entire audio at once.
+    Returns the emotion label enhanced with an emoji.
     """
     try:
         if use_ensemble:
 def plot_waveform(audio_file):
     """
+    Generate and return a waveform plot image (as a PIL Image) for the given audio file.
     """
     y, sr = librosa.load(audio_file, sr=16000, mono=True)
     plt.figure(figsize=(10, 3))
     plt.savefig(buf, format="png")
     plt.close()
     buf.seek(0)
+    # Convert buffer to PIL Image
+    image = Image.open(buf)
+    return image
 def predict_and_plot(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap):
     """
     Run emotion prediction and generate a waveform plot.
+    Returns a tuple: (emotion label with emoji, waveform image as a PIL Image).
     """
     emotion = predict_emotion(audio_file, use_ensemble, apply_noise_reduction, segment_duration, overlap)
     waveform = plot_waveform(audio_file)
     with gr.Tabs():
         with gr.TabItem("Emotion Recognition"):
             with gr.Row():
                 audio_input = gr.Audio(type="filepath", label="Upload Audio")
             use_ensemble = gr.Checkbox(label="Use Ensemble Prediction (for long audio)", value=False)
             apply_noise_reduction = gr.Checkbox(label="Apply Noise Reduction", value=False)
                 overlap = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Segment Overlap (s)")
             predict_button = gr.Button("Predict Emotion")
             result_text = gr.Textbox(label="Predicted Emotion")
+            # Set type to "pil" since we are returning a PIL Image
+            waveform_image = gr.Image(label="Audio Waveform", type="pil")
             predict_button.click(
                 predict_and_plot,