Spaces:

ibrahim313
/

TextToAudio

Build error

App Files Files Community

TextToAudio / app.py

ibrahim313

Update app.py

562fd62 verified 12 months ago

raw

history blame

3.89 kB

	import streamlit as st
	import torch
	from parler_tts import ParlerTTSForConditionalGeneration
	from transformers import AutoTokenizer
	import soundfile as sf

	# Set up the device
	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	# Load the model and tokenizer
	model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to(device)
	tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")

	# Neon-themed styling
	st.markdown("""
	<style>
	body {
	background-color: #0f0f0f;
	color: #0fff0f;
	}
	.stTextInput, .stTextArea {
	background-color: #333333;
	color: #0fff0f;
	}
	.stButton > button {
	background-color: #0fff0f;
	color: #0f0f0f;
	}
	</style>
	""", unsafe_allow_html=True)

	st.title("🎤 Neon TTS Converter")

	# Predefined voice options
	voices = {
	"Smooth Female": "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch.",
	"Monotone Male": "Jon's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.",
	"Energetic Youth": "An energetic young speaker with a lively tone and rapid speech, creating a sense of excitement.",
	"Calm Elderly": "An elderly speaker with a calm and slow-paced voice, bringing wisdom and serenity to the speech.",
	"Robotic": "A robotic, artificial voice with a consistent pitch and no variation in tone.",
	"Narrator": "A deep and clear voice, with a strong presence and a slightly slower pace, suitable for narrations.",
	"Whisper": "A soft, whispered voice, with very low volume and an intimate tone.",
	"Formal": "A formal, authoritative voice with clear articulation and a steady pace.",
	"Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.",
	"Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue.",
	"Bass-Heavy Male": "A deep, resonant male voice with a strong bass, ideal for dramatic and powerful delivery.",
	"Actor Voice 1": "An actor's voice with a dynamic range, capable of various emotional tones and expressions.",
	"Actor Voice 2": "A distinct and engaging actor's voice, providing a unique flair and character to the speech."
	}

	# Sidebar for voice selection
	st.sidebar.header("Select Voice")
	voice_choice = st.sidebar.selectbox("Choose a Voice", list(voices.keys()))

	# Display the selected voice description
	st.sidebar.markdown(f"Description: {voices[voice_choice]}")

	# Input for custom prompt
	st.sidebar.header("Custom Prompt")
	prompt = st.sidebar.text_area("Enter your custom prompt", value="Hey, how are you doing today?")

	# Error handling
	try:
	# Generate the TTS output
	if st.sidebar.button("Generate Speech"):
	description = voices[voice_choice]
	input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
	prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

	# Create attention masks
	attention_mask = tokenizer(description, return_tensors="pt").attention_mask.to(device)
	prompt_attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.to(device)

	# Generate speech
	generation = model.generate(
	input_ids=input_ids,
	prompt_input_ids=prompt_input_ids,
	attention_mask=attention_mask,
	prompt_attention_mask=prompt_attention_mask
	)
	audio_arr = generation.cpu().numpy().squeeze()

	# Save the audio file
	output_file = "parler_tts_out.wav"
	sf.write(output_file, audio_arr, model.config.sampling_rate)

	# Display the audio player
	st.audio(output_file)
	st.success("Speech generation complete!")
	except Exception as e:
	st.error(f"An error occurred: {e}")