Voice-To-Text

Sleeping

App Files Files Community

arshadrana commited on Nov 8, 2024

Commit

a562e5f

verified ·

1 Parent(s): 5f94e97

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -18

app.py CHANGED Viewed

@@ -1,42 +1,40 @@
 import gradio as gr
 import speech_recognition as sr
 from pydub import AudioSegment
 import tempfile
 def transcribe_audio(audio_input):
     recognizer = sr.Recognizer()
-    # Ensure the input is a tuple and get the audio data bytes
     if isinstance(audio_input, tuple) and len(audio_input) == 2:
         audio_data_bytes = audio_input[1]
     else:
         raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
-    # Write audio data to a temporary file in its original format
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
-        temp_audio_file.write(audio_data_bytes)
-        temp_audio_file_path = temp_audio_file.name
-    # Convert to WAV format using pydub and re-read for compatibility
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
-        audio_segment = AudioSegment.from_file(temp_audio_file_path)
         audio_segment.export(wav_file.name, format="wav")
         wav_file_path = wav_file.name
-    # Load the WAV file for transcription
-    with sr.AudioFile(wav_file_path) as source:
-        audio_data = recognizer.record(source)
     try:
-        # Transcribe the audio data
-        text = recognizer.recognize_google(audio_data)
-        return text
     except sr.UnknownValueError:
         return "Google Speech Recognition could not understand audio"
     except sr.RequestError as e:
         return f"Could not request results from Google Speech Recognition service; {e}"
-# Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe_audio,
     inputs="audio",
@@ -45,5 +43,4 @@ iface = gr.Interface(
     description="Upload an audio file and get the transcribed text."
 )
-# Launch the interface
 iface.launch()

 import gradio as gr
 import speech_recognition as sr
 from pydub import AudioSegment
+from io import BytesIO
 import tempfile
 def transcribe_audio(audio_input):
     recognizer = sr.Recognizer()
     if isinstance(audio_input, tuple) and len(audio_input) == 2:
         audio_data_bytes = audio_input[1]
     else:
         raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
+    # Load audio as raw data
+    try:
+        audio_segment = AudioSegment.from_file(BytesIO(audio_data_bytes), format="mp3")
+    except Exception as e:
+        return f"Error loading audio file: {e}"
+    # Save as WAV to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
         audio_segment.export(wav_file.name, format="wav")
         wav_file_path = wav_file.name
+    # Transcribe the audio
     try:
+        with sr.AudioFile(wav_file_path) as source:
+            audio_data = recognizer.record(source)
+            text = recognizer.recognize_google(audio_data)
+            return text
     except sr.UnknownValueError:
         return "Google Speech Recognition could not understand audio"
     except sr.RequestError as e:
         return f"Could not request results from Google Speech Recognition service; {e}"
+# Gradio Interface
 iface = gr.Interface(
     fn=transcribe_audio,
     inputs="audio",
     description="Upload an audio file and get the transcribed text."
 )
 iface.launch()