arshadrana commited on
Commit
a562e5f
·
verified ·
1 Parent(s): 5f94e97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -18
app.py CHANGED
@@ -1,42 +1,40 @@
1
  import gradio as gr
2
  import speech_recognition as sr
3
  from pydub import AudioSegment
 
4
  import tempfile
5
 
6
  def transcribe_audio(audio_input):
7
  recognizer = sr.Recognizer()
8
 
9
- # Ensure the input is a tuple and get the audio data bytes
10
  if isinstance(audio_input, tuple) and len(audio_input) == 2:
11
  audio_data_bytes = audio_input[1]
12
  else:
13
  raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
 
 
 
 
 
 
14
 
15
- # Write audio data to a temporary file in its original format
16
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
17
- temp_audio_file.write(audio_data_bytes)
18
- temp_audio_file_path = temp_audio_file.name
19
-
20
- # Convert to WAV format using pydub and re-read for compatibility
21
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
22
- audio_segment = AudioSegment.from_file(temp_audio_file_path)
23
  audio_segment.export(wav_file.name, format="wav")
24
  wav_file_path = wav_file.name
25
-
26
- # Load the WAV file for transcription
27
- with sr.AudioFile(wav_file_path) as source:
28
- audio_data = recognizer.record(source)
29
-
30
  try:
31
- # Transcribe the audio data
32
- text = recognizer.recognize_google(audio_data)
33
- return text
 
34
  except sr.UnknownValueError:
35
  return "Google Speech Recognition could not understand audio"
36
  except sr.RequestError as e:
37
  return f"Could not request results from Google Speech Recognition service; {e}"
38
 
39
- # Create the Gradio interface
40
  iface = gr.Interface(
41
  fn=transcribe_audio,
42
  inputs="audio",
@@ -45,5 +43,4 @@ iface = gr.Interface(
45
  description="Upload an audio file and get the transcribed text."
46
  )
47
 
48
- # Launch the interface
49
  iface.launch()
 
1
  import gradio as gr
2
  import speech_recognition as sr
3
  from pydub import AudioSegment
4
+ from io import BytesIO
5
  import tempfile
6
 
7
  def transcribe_audio(audio_input):
8
  recognizer = sr.Recognizer()
9
 
 
10
  if isinstance(audio_input, tuple) and len(audio_input) == 2:
11
  audio_data_bytes = audio_input[1]
12
  else:
13
  raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
14
+
15
+ # Load audio as raw data
16
+ try:
17
+ audio_segment = AudioSegment.from_file(BytesIO(audio_data_bytes), format="mp3")
18
+ except Exception as e:
19
+ return f"Error loading audio file: {e}"
20
 
21
+ # Save as WAV to a temporary file
 
 
 
 
 
22
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
 
23
  audio_segment.export(wav_file.name, format="wav")
24
  wav_file_path = wav_file.name
25
+
26
+ # Transcribe the audio
 
 
 
27
  try:
28
+ with sr.AudioFile(wav_file_path) as source:
29
+ audio_data = recognizer.record(source)
30
+ text = recognizer.recognize_google(audio_data)
31
+ return text
32
  except sr.UnknownValueError:
33
  return "Google Speech Recognition could not understand audio"
34
  except sr.RequestError as e:
35
  return f"Could not request results from Google Speech Recognition service; {e}"
36
 
37
+ # Gradio Interface
38
  iface = gr.Interface(
39
  fn=transcribe_audio,
40
  inputs="audio",
 
43
  description="Upload an audio file and get the transcribed text."
44
  )
45
 
 
46
  iface.launch()