Spaces:
Running
Running
import torch | |
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration | |
from audio_recorder_streamlit import audio_recorder | |
import numpy as np | |
def transcribe_audio(audio_bytes): | |
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-mustc-en-fr-st") | |
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-mustc-en-fr-st") | |
generated_ids = model.generate(input_ids=audio_bytes["input_features"], attention_mask=audio_bytes["attention_mask"]) | |
translation = processor.batch_decode(generated_ids, skip_special_tokens=True) | |
return translation | |
st.title("Audio to Text Transcription..") | |
audio_bytes = audio_recorder(pause_threshold=3.0, sample_rate=16_000) | |
if audio_bytes: | |
st.audio(audio_bytes, format="audio/wav") | |
transcription = transcribe_audio(audio_bytes) | |
if transcription: | |
st.write("Transcription:") | |
st.write(transcription) | |
else: | |
st.write("Error: Failed to transcribe audio.") | |
else: | |
st.write("No audio recorded.") |