pratikshahp commited on
Commit
930e423
·
verified ·
1 Parent(s): 0d2601e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -2,7 +2,6 @@
2
  # Author: Pratiksha Patel
3
  # Description: This script record the audio, transform it to text, detect the language of the file and save it to a txt file.
4
  # import required modules
5
- import os
6
  import torch
7
  import streamlit as st
8
  from audio_recorder_streamlit import audio_recorder
@@ -15,7 +14,7 @@ def transcribe_audio(audio_bytes):
15
  model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large")
16
  audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
17
  audio_tensor = torch.tensor(audio_array, dtype=torch.float64) / 32768.0
18
- input_values = processor(audio_tensor, return_tensors="pt", sampling_rate=16000).input_values
19
  logits = model(input_values).logits
20
  predicted_ids = torch.argmax(logits, dim=-1)
21
  transcription = processor.decode(predicted_ids[0])
@@ -38,3 +37,4 @@ if audio_bytes:
38
  st.write("Error: Failed to transcribe audio.")
39
  else:
40
  st.write("No audio recorded.")
 
 
2
  # Author: Pratiksha Patel
3
  # Description: This script record the audio, transform it to text, detect the language of the file and save it to a txt file.
4
  # import required modules
 
5
  import torch
6
  import streamlit as st
7
  from audio_recorder_streamlit import audio_recorder
 
14
  model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large")
15
  audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
16
  audio_tensor = torch.tensor(audio_array, dtype=torch.float64) / 32768.0
17
+ input_values = processor(input_values=audio_tensor, return_tensors="pt", sampling_rate=16000).input_values
18
  logits = model(input_values).logits
19
  predicted_ids = torch.argmax(logits, dim=-1)
20
  transcription = processor.decode(predicted_ids[0])
 
37
  st.write("Error: Failed to transcribe audio.")
38
  else:
39
  st.write("No audio recorded.")
40
+