personalize_music_app_speech

Running

App Files Files Community

Akshayram1 commited on Jan 8

Commit

32b0b9d

verified ·

1 Parent(s): d2afc1b

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -32

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import speech_recognition as sr
 import tempfile
 from pydub import AudioSegment
 import numpy as np
 # Load sentiment analysis model using PyTorch backend
 mood_classifier = pipeline("sentiment-analysis", framework="pt")
@@ -20,41 +21,32 @@ def detect_mood(text):
     else:
         return "neutral"
-def speech_to_text():
     # Initialize recognizer
     r = sr.Recognizer()
     # Create a temporary file to store the recorded audio
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
-        # Add audio recording widget
-        audio_bytes = st.audio_recorder(
-            text="Click to record your mood",
-            recording_color="#e8b62c",
-            neutral_color="#6aa36f"
-        )
-        if audio_bytes:
-            # Save audio bytes to temporary file
-            fp.write(audio_bytes)
-            temp_filename = fp.name
-            # Read the audio file
-            with sr.AudioFile(temp_filename) as source:
-                # Adjust for ambient noise and record
-                r.adjust_for_ambient_noise(source)
-                audio = r.record(source)
-                try:
-                    # Use Google Speech Recognition
-                    text = r.recognize_google(audio)
-                    return text
-                except sr.UnknownValueError:
-                    st.error("Could not understand the audio")
-                    return None
-                except sr.RequestError:
-                    st.error("Could not request results from speech recognition service")
-                    return None
-    return None
 def get_song_recommendations(mood, api_key):
     try:
@@ -107,11 +99,29 @@ if input_method == "Text":
 else:
     # Speech input
     st.write("📢 Tell me about your day...")
-    spoken_text = speech_to_text()
-    if spoken_text:
-        st.write(f"You said: {spoken_text}")
-        user_mood = detect_mood(spoken_text)
     else:
         user_mood = None

 import tempfile
 from pydub import AudioSegment
 import numpy as np
+from streamlit_webrtc import webrtc_streamer, WebRtcMode, AudioProcessorBase
 # Load sentiment analysis model using PyTorch backend
 mood_classifier = pipeline("sentiment-analysis", framework="pt")
     else:
         return "neutral"
+def speech_to_text(audio_bytes):
     # Initialize recognizer
     r = sr.Recognizer()
     # Create a temporary file to store the recorded audio
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
+        # Save audio bytes to temporary file
+        fp.write(audio_bytes)
+        temp_filename = fp.name
+        # Read the audio file
+        with sr.AudioFile(temp_filename) as source:
+            # Adjust for ambient noise and record
+            r.adjust_for_ambient_noise(source)
+            audio = r.record(source)
+            try:
+                # Use Google Speech Recognition
+                text = r.recognize_google(audio)
+                return text
+            except sr.UnknownValueError:
+                st.error("Could not understand the audio")
+                return None
+            except sr.RequestError:
+                st.error("Could not request results from speech recognition service")
+                return None
 def get_song_recommendations(mood, api_key):
     try:
 else:
     # Speech input
     st.write("📢 Tell me about your day...")
+    # Use streamlit-webrtc for audio recording
+    webrtc_ctx = webrtc_streamer(
+        key="speech-to-text",
+        mode=WebRtcMode.SENDONLY,
+        audio_receiver_size=1024,
+        media_stream_constraints={"audio": True, "video": False},
+    )
+    if webrtc_ctx.audio_receiver:
+        audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=5)
+        if audio_frames:
+            audio_bytes = b"".join([frame.to_ndarray().tobytes() for frame in audio_frames])
+            spoken_text = speech_to_text(audio_bytes)
+            if spoken_text:
+                st.write(f"You said: {spoken_text}")
+                user_mood = detect_mood(spoken_text)
+            else:
+                user_mood = None
+        else:
+            st.warning("No audio frames received. Please try again.")
+            user_mood = None
     else:
         user_mood = None