Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,42 +1,40 @@
|
|
1 |
import gradio as gr
|
2 |
import speech_recognition as sr
|
3 |
from pydub import AudioSegment
|
|
|
4 |
import tempfile
|
5 |
|
6 |
def transcribe_audio(audio_input):
|
7 |
recognizer = sr.Recognizer()
|
8 |
|
9 |
-
# Ensure the input is a tuple and get the audio data bytes
|
10 |
if isinstance(audio_input, tuple) and len(audio_input) == 2:
|
11 |
audio_data_bytes = audio_input[1]
|
12 |
else:
|
13 |
raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
#
|
16 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
|
17 |
-
temp_audio_file.write(audio_data_bytes)
|
18 |
-
temp_audio_file_path = temp_audio_file.name
|
19 |
-
|
20 |
-
# Convert to WAV format using pydub and re-read for compatibility
|
21 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
|
22 |
-
audio_segment = AudioSegment.from_file(temp_audio_file_path)
|
23 |
audio_segment.export(wav_file.name, format="wav")
|
24 |
wav_file_path = wav_file.name
|
25 |
-
|
26 |
-
#
|
27 |
-
with sr.AudioFile(wav_file_path) as source:
|
28 |
-
audio_data = recognizer.record(source)
|
29 |
-
|
30 |
try:
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
except sr.UnknownValueError:
|
35 |
return "Google Speech Recognition could not understand audio"
|
36 |
except sr.RequestError as e:
|
37 |
return f"Could not request results from Google Speech Recognition service; {e}"
|
38 |
|
39 |
-
#
|
40 |
iface = gr.Interface(
|
41 |
fn=transcribe_audio,
|
42 |
inputs="audio",
|
@@ -45,5 +43,4 @@ iface = gr.Interface(
|
|
45 |
description="Upload an audio file and get the transcribed text."
|
46 |
)
|
47 |
|
48 |
-
# Launch the interface
|
49 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import speech_recognition as sr
|
3 |
from pydub import AudioSegment
|
4 |
+
from io import BytesIO
|
5 |
import tempfile
|
6 |
|
7 |
def transcribe_audio(audio_input):
|
8 |
recognizer = sr.Recognizer()
|
9 |
|
|
|
10 |
if isinstance(audio_input, tuple) and len(audio_input) == 2:
|
11 |
audio_data_bytes = audio_input[1]
|
12 |
else:
|
13 |
raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
|
14 |
+
|
15 |
+
# Load audio as raw data
|
16 |
+
try:
|
17 |
+
audio_segment = AudioSegment.from_file(BytesIO(audio_data_bytes), format="mp3")
|
18 |
+
except Exception as e:
|
19 |
+
return f"Error loading audio file: {e}"
|
20 |
|
21 |
+
# Save as WAV to a temporary file
|
|
|
|
|
|
|
|
|
|
|
22 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
|
|
|
23 |
audio_segment.export(wav_file.name, format="wav")
|
24 |
wav_file_path = wav_file.name
|
25 |
+
|
26 |
+
# Transcribe the audio
|
|
|
|
|
|
|
27 |
try:
|
28 |
+
with sr.AudioFile(wav_file_path) as source:
|
29 |
+
audio_data = recognizer.record(source)
|
30 |
+
text = recognizer.recognize_google(audio_data)
|
31 |
+
return text
|
32 |
except sr.UnknownValueError:
|
33 |
return "Google Speech Recognition could not understand audio"
|
34 |
except sr.RequestError as e:
|
35 |
return f"Could not request results from Google Speech Recognition service; {e}"
|
36 |
|
37 |
+
# Gradio Interface
|
38 |
iface = gr.Interface(
|
39 |
fn=transcribe_audio,
|
40 |
inputs="audio",
|
|
|
43 |
description="Upload an audio file and get the transcribed text."
|
44 |
)
|
45 |
|
|
|
46 |
iface.launch()
|