pratikshahp's picture
Update app.py
1d97cff verified
raw
history blame
2.19 kB
# Transform an audio to text script with language detection.
# Author: Pratiksha Patel
# Description: This script record the audio, transform it to text, detect the language of the file and save it to a txt file.
# import required modules
import os
import streamlit as st
from audio_recorder_streamlit import audio_recorder
from langdetect import detect
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large")
# Load model directly
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
def transcribe_audio(audio_bytes):
processor = AutoProcessor.from_pretrained("openai/whisper-large")
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large")
audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
# Cast audio array to double precision and normalize
audio_tensor = torch.tensor(audio_array, dtype=torch.float64) / 32768.0
input_values = processor(audio_tensor, return_tensors="pt", sampling_rate=16000).input_values
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.decode(predicted_ids[0])
return transcription
# Function to open a file
def startfile(fn):
os.system('open %s' % fn)
# Function to create and open a txt file
def create_and_open_txt(text, filename):
# Create and write the text to a txt file
with open(filename, "w") as file:
file.write(text)
startfile(filename)
# Streamlit app
st.title("Audio to Text Transcription..")
audio_bytes = audio_recorder(pause_threshold=3.0, sample_rate=16_000)
if audio_bytes:
st.audio(audio_bytes, format="audio/wav")
transcription = transcribe_audio(audio_bytes)
if transcription:
st.write("Transcription:")
st.write(transcription)
else:
st.write("Error: Failed to transcribe audio.")
else:
st.write("No audio recorded.")
# Detect the language
language = detect(transcribed_text)
st.write(f"Detected language: {language}")
# Create and open a txt file with the text
create_and_open_txt(transcribed_text, f"output_{language}.txt")