# Transform an audio to text script with language detection. # Author: Pratiksha Patel # Description: This script record the audio, transform it to text, detect the language of the file and save it to a txt file. # import required modules import os import torch import streamlit as st from audio_recorder_streamlit import audio_recorder from langdetect import detect import numpy as np # Use a pipeline as a high-level helper #from transformers import pipeline #pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large") # Load model directly from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq def transcribe_audio(audio_bytes): processor = AutoProcessor.from_pretrained("openai/whisper-large") model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large") audio_array = np.frombuffer(audio_bytes, dtype=np.int16) # Cast audio array to double precision and normalize audio_tensor = torch.tensor(audio_array, dtype=torch.float64) / 32768.0 input_values = processor(audio_tensor, return_tensors="pt", sampling_rate=16000).input_values logits = model(input_values).logits predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.decode(predicted_ids[0]) return transcription # Function to open a file #def startfile(fn): # os.system('open %s' % fn) # Function to create and open a txt file #def create_and_open_txt(text, filename): # Create and write the text to a txt file # with open(filename, "w") as file: # file.write(text) # startfile(filename) # Streamlit app st.title("Audio to Text Transcription..") audio_bytes = audio_recorder(pause_threshold=3.0, sample_rate=16_000) if audio_bytes: st.audio(audio_bytes, format="audio/wav") transcription = transcribe_audio(audio_bytes) if transcription: st.write("Transcription:") st.write(transcription) else: st.write("Error: Failed to transcribe audio.") else: st.write("No audio recorded.") # Detect the language #language = detect(transcription) #st.write(f"Detected language: {language}") # Create and open a txt file with the text #create_and_open_txt(transcription, f"output_{language}.txt")