Spaces:

wifix199
/

Text_to_speech_LuminaIQ

Running

App Files Files Community

Text_to_speech_LuminaIQ / app.py

wifix199

Upload 2 files

e67e9cb verified 7 months ago

raw

history blame

1.77 kB

	import gradio as gr
	import numpy as np
	import torch
	from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan

	# Load the model and vocoder
	checkpoint = "microsoft/speecht5_tts"
	processor = SpeechT5Processor.from_pretrained(checkpoint)
	model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
	vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

	# Speaker embeddings for male and female
	speaker_embeddings = {
	"male": "speaker/cmu_us_bdl_arctic-wav-arctic_a0009.npy",
	"female": "speaker/cmu_us_slt_arctic-wav-arctic_a0508.npy"
	}

	# Function to generate speech
	def text_to_speech(text, gender):
	if len(text.strip()) == 0:
	return (16000, np.zeros(0).astype(np.int16))

	inputs = processor(text=text, return_tensors="pt")

	# Truncate input if too long
	input_ids = inputs["input_ids"]
	input_ids = input_ids[..., :model.config.max_text_positions]

	# Load speaker embedding based on gender selection
	speaker_embedding_path = speaker_embeddings[gender]
	speaker_embedding = np.load(speaker_embedding_path)
	speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)

	# Generate speech
	speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)
	speech = (speech.numpy() * 32767).astype(np.int16)

	return (16000, speech)

	# Create the Gradio interface
	iface = gr.Interface(
	fn=text_to_speech,
	inputs=[
	gr.Textbox(label="Enter Text"),
	gr.Radio(["male", "female"], label="Select Voice Gender") # Gender selection
	],
	outputs=gr.Audio(label="Generated Speech"),
	title="Text-to-Speech Bot",
	description="Enter text and select a voice gender to generate speech."
	)

	# Launch the interface
	iface.launch()