Speech_to_Image / app.py
GojoSaturo's picture
Update app.py
c9878f8
raw
history blame
730 Bytes
pip install googletrans
import gradio as gr
from googletrans import Translator
import torch
# Initialize Translator
from transformers import pipeline
translator = Translator()
MODEL_NAME = "openai/whisper-base"
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe_audio(audio):
text = pipe(audio)["text"]
return text
# return translated_text
audio_record = gr.inputs.Audio(source='microphone', label='Record Audio')
output_text = gr.outputs.Textbox(label='Transcription')
interface = gr.Interface(fn=transcribe_audio, inputs=audio_record, outputs=output_text)
interface.launch()