Spaces:
Running
Running
import gradio as gr | |
import torch | |
import soundfile as sf | |
import spaces | |
import os | |
import numpy as np | |
import re | |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan | |
from speechbrain.pretrained import EncoderClassifier | |
from datasets import load_dataset | |
# Load model directly | |
from transformers import AutoProcessor, AutoModelForTextToSpectrogram | |
processor = AutoProcessor.from_pretrained("Aumkeshchy2003/speecht5_finetuned_Aumkesh_tr") | |
model = AutoModelForTextToSpectrogram.from_pretrained("Aumkeshchy2003/speecht5_finetuned_Aumkesh_tr") | |
iface = gr.Interface( | |
fn=text_to_speech, | |
inputs=[ | |
gr.Textbox(label="Enter English text to convert to speech") | |
], | |
outputs=[ | |
gr.Audio(label="Generated Speech", type="numpy") | |
], | |
title="English SpeechT5 Text-to-Speech Demo", | |
description="Enter English text, and listen to the generated speech." | |
) | |
iface.launch(share=True) |