Spaces:

pratikshahp
/

audio-to-text

Sleeping

audio-to-text / app.py

Update app.py

f013d22 verified over 1 year ago

1.61 kB

	import torch
	import torchaudio
	from torchaudio.transforms import Resample
	from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
	from audio_recorder_streamlit import audio_recorder
	import streamlit as st

	def preprocess_audio(audio_bytes, sample_rate=16000):
	# Load audio and convert to mono if necessary
	waveform, _ = torchaudio.load(audio_bytes, normalize=True)
	if waveform.size(0) > 1:
	waveform = torch.mean(waveform, dim=0, keepdim=True)

	# Resample if needed
	if waveform.shape[1] != sample_rate:
	resampler = Resample(orig_freq=waveform.shape[1], new_freq=sample_rate)
	waveform = resampler(waveform)

	return waveform

	def transcribe_audio(audio_bytes):
	model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-mustc-en-fr-st")
	processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-mustc-en-fr-st")

	# Preprocess audio
	input_features = preprocess_audio(audio_bytes)

	# Generate transcription
	generated_ids = model.generate(input_features)
	translation = processor.batch_decode(generated_ids, skip_special_tokens=True)

	return translation

	st.title("Audio to Text Transcription..")
	audio_bytes = audio_recorder(pause_threshold=3.0, sample_rate=16000)
	if audio_bytes:
	st.audio(audio_bytes, format="audio/wav")

	transcription = transcribe_audio(audio_bytes)
	if transcription:
	st.write("Transcription:")
	st.write(transcription)
	else:
	st.write("Error: Failed to transcribe audio.")
	else:
	st.write("No audio recorded.")