Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from datasets import load_dataset | |
| from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan | |
| import soundfile as sf | |
| # Load the fine-tuned model, processor, and vocoder | |
| model_name = "microsoft/speecht5_tts" | |
| processor = SpeechT5Processor.from_pretrained(model_name) | |
| model = SpeechT5ForTextToSpeech.from_pretrained("emirhanbilgic/speecht5_finetuned_emirhan_tr") | |
| vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") | |
| # Load the Turkish dataset | |
| turkish_dataset = load_dataset("erenfazlioglu/turkishvoicedataset", split="train") | |
| # Get an example text and its corresponding audio | |
| example_item = turkish_dataset[0] | |
| example_text = example_item['text'] | |
| example_audio = example_item['audio']['array'] | |
| # Create speaker embedding from the example audio | |
| with torch.no_grad(): | |
| speaker_embeddings = model.get_speaker_embeddings(torch.tensor(example_audio).unsqueeze(0)) | |
| def text_to_speech(text): | |
| inputs = processor(text=text, return_tensors="pt") | |
| speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) | |
| sf.write("output.wav", speech.numpy(), samplerate=16000) | |
| return "output.wav" | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=text_to_speech, | |
| inputs=gr.Textbox(label="Enter Turkish text to convert to speech", value=example_text), | |
| outputs=gr.Audio(label="Generated Speech"), | |
| title="Turkish SpeechT5 Text-to-Speech Demo", | |
| description="Enter Turkish text and listen to the generated speech using the fine-tuned SpeechT5 model." | |
| ) | |
| # Launch the demo | |
| iface.launch() |