Spaces:

arshadrana
/

voice-to-text

Running

arshadrana commited on Nov 8, 2024

Commit

5f94e97

verified ·

1 Parent(s): 6fdb7ee

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import speech_recognition as sr
-from io import BytesIO
 from pydub import AudioSegment
 def transcribe_audio(audio_input):
     recognizer = sr.Recognizer()
@@ -12,17 +12,19 @@ def transcribe_audio(audio_input):
     else:
         raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
-    # Use BytesIO to create a file-like object from the audio bytes
-    audio_file = BytesIO(audio_data_bytes)
-    # Convert audio to WAV format using pydub
-    audio_segment = AudioSegment.from_file(audio_file)
-    wav_io = BytesIO()
-    audio_segment.export(wav_io, format="wav")
-    wav_io.seek(0)  # Move to the beginning of the file-like object
-    # Load the audio file from the file-like object in WAV format
-    with sr.AudioFile(wav_io) as source:
         audio_data = recognizer.record(source)
     try:

 import gradio as gr
 import speech_recognition as sr
 from pydub import AudioSegment
+import tempfile
 def transcribe_audio(audio_input):
     recognizer = sr.Recognizer()
     else:
         raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
+    # Write audio data to a temporary file in its original format
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
+        temp_audio_file.write(audio_data_bytes)
+        temp_audio_file_path = temp_audio_file.name
+    # Convert to WAV format using pydub and re-read for compatibility
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
+        audio_segment = AudioSegment.from_file(temp_audio_file_path)
+        audio_segment.export(wav_file.name, format="wav")
+        wav_file_path = wav_file.name
+    # Load the WAV file for transcription
+    with sr.AudioFile(wav_file_path) as source:
         audio_data = recognizer.record(source)
     try: