File size: 929 Bytes
0452208
 
 
 
 
 
 
 
 
 
 
feda536
 
0452208
feda536
 
0452208
 
 
 
7d2e880
0452208
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gradio as gr
import torch
import soundfile as sf
import spaces
import os
import numpy as np
import re
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
from speechbrain.pretrained import EncoderClassifier
from datasets import load_dataset

# Load model directly
from transformers import AutoProcessor, AutoModelForTextToSpectrogram

processor = AutoProcessor.from_pretrained("Aumkeshchy2003/speecht5_finetuned_Aumkesh_tr")
model = AutoModelForTextToSpectrogram.from_pretrained("Aumkeshchy2003/speecht5_finetuned_Aumkesh_tr")

iface = gr.Interface(
    fn=text_to_speech,
    inputs=[
        gr.Textbox(label="Enter English text to convert to speech")
    ],
    outputs=[
        gr.Audio(label="Generated Speech", type="numpy")
    ],
    title="English SpeechT5 Text-to-Speech Demo",
    description="Enter English text, and listen to the generated speech."
)
iface.launch(share=True)