Spaces:

shukdevdatta123
/

VocalForge-AI

Running

App Files Files Community

VocalForge-AI / app.py

shukdevdatta123

Update app.py

4760b00 verified about 2 months ago

raw

history blame

3.04 kB

	# !pip install TTS gradio numpy librosa torch

	from TTS.api import TTS
	import gradio as gr
	import numpy as np
	import librosa
	import torch
	import tempfile
	import os

	# Check device availability
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Initialize TTS model
	model_name = "tts_models/multilingual/multi-dataset/your_tts"
	tts = TTS(model_name=model_name).to(device)

	def process_audio(audio_path, max_duration=10):
	"""Load and trim audio to specified duration"""
	y, sr = librosa.load(audio_path, sr=16000, mono=True)
	max_samples = max_duration * sr
	if len(y) > max_samples:
	y = y[:int(max_samples)]
	return y, sr

	def generate_speech(audio_file, text):
	# Create temp files
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ref_file, \
	tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_file:

	ref_path = ref_file.name
	out_path = out_file.name

	# Process reference audio
	y, sr = process_audio(audio_file)
	librosa.output.write_wav(ref_path, y, sr)

	# Generate speech
	try:
	tts.tts_to_file(
	text=text,
	speaker_wav=ref_path,
	language="en",
	file_path=out_path
	)

	# Clean up temporary files
	os.unlink(ref_path)
	return out_path
	except Exception as e:
	print(f"Error: {e}")
	return None

	# Gradio interface
	with gr.Blocks(title="Voice Clone TTS") as demo:
	gr.Markdown("""
	# 🎤 Voice Clone Text-to-Speech
	1. Upload a short English voice sample (5-10 seconds)
	2. Enter text you want to speak
	3. Generate audio in your voice!
	""")

	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="Upload Voice Sample",
	interactive=True
	)
	text_input = gr.Textbox(
	label="Text to Speak",
	placeholder="Enter English text here...",
	lines=4
	)
	btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column():
	audio_output = gr.Audio(
	label="Generated Speech",
	interactive=False
	)
	error_output = gr.Textbox(label="Processing Info", visible=False)

	# Example inputs
	gr.Examples(
	examples=[
	["examples/sample_voice.wav", "Hello! Welcome to the future of voice cloning technology"],
	["examples/sample_voice2.wav", "This text is spoken in a completely cloned voice"]
	],
	inputs=[audio_input, text_input],
	outputs=audio_output,
	fn=generate_speech,
	cache_examples=True
	)

	btn.click(
	fn=generate_speech,
	inputs=[audio_input, text_input],
	outputs=audio_output
	)

	if __name__ == "__main__":
	demo.launch(server_port=7860, share=True)