Spaces:

pratikshahp
/

audio-to-text-conversion

Runtime error

pratikshahp commited on Mar 26, 2024

Commit

930e423

verified ·

1 Parent(s): 0d2601e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,6 @@
 # Author: Pratiksha Patel
 # Description: This script record the audio, transform it to text, detect the language of the file and save it to a txt file.
 # import required modules
-import os
 import torch
 import streamlit as st
 from audio_recorder_streamlit import audio_recorder
@@ -15,7 +14,7 @@ def transcribe_audio(audio_bytes):
     model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large")
     audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
     audio_tensor = torch.tensor(audio_array, dtype=torch.float64) / 32768.0
-    input_values = processor(audio_tensor, return_tensors="pt", sampling_rate=16000).input_values
     logits = model(input_values).logits
     predicted_ids = torch.argmax(logits, dim=-1)
     transcription = processor.decode(predicted_ids[0])
@@ -38,3 +37,4 @@ if audio_bytes:
         st.write("Error: Failed to transcribe audio.")
 else:
     st.write("No audio recorded.")

 # Author: Pratiksha Patel
 # Description: This script record the audio, transform it to text, detect the language of the file and save it to a txt file.
 # import required modules
 import torch
 import streamlit as st
 from audio_recorder_streamlit import audio_recorder
     model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large")
     audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
     audio_tensor = torch.tensor(audio_array, dtype=torch.float64) / 32768.0
+    input_values = processor(input_values=audio_tensor, return_tensors="pt", sampling_rate=16000).input_values
     logits = model(input_values).logits
     predicted_ids = torch.argmax(logits, dim=-1)
     transcription = processor.decode(predicted_ids[0])
         st.write("Error: Failed to transcribe audio.")
 else:
     st.write("No audio recorded.")