English-tts / app.py
Aumkeshchy2003's picture
Update app.py
feda536 verified
raw
history blame
929 Bytes
import gradio as gr
import torch
import soundfile as sf
import spaces
import os
import numpy as np
import re
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
from speechbrain.pretrained import EncoderClassifier
from datasets import load_dataset
# Load model directly
from transformers import AutoProcessor, AutoModelForTextToSpectrogram
processor = AutoProcessor.from_pretrained("Aumkeshchy2003/speecht5_finetuned_Aumkesh_tr")
model = AutoModelForTextToSpectrogram.from_pretrained("Aumkeshchy2003/speecht5_finetuned_Aumkesh_tr")
iface = gr.Interface(
fn=text_to_speech,
inputs=[
gr.Textbox(label="Enter English text to convert to speech")
],
outputs=[
gr.Audio(label="Generated Speech", type="numpy")
],
title="English SpeechT5 Text-to-Speech Demo",
description="Enter English text, and listen to the generated speech."
)
iface.launch(share=True)