Spaces:
Build error
Build error
| import streamlit as st | |
| import torch | |
| from parler_tts import ParlerTTSForConditionalGeneration | |
| from transformers import AutoTokenizer | |
| import soundfile as sf | |
| # Set up the device | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| # Load the model and tokenizer | |
| model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to(device) | |
| tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1") | |
| # Neon-themed styling | |
| st.markdown(""" | |
| <style> | |
| body { | |
| background-color: #0f0f0f; | |
| color: #0fff0f; | |
| } | |
| .stTextInput, .stTextArea { | |
| background-color: #333333; | |
| color: #0fff0f; | |
| } | |
| .stButton > button { | |
| background-color: #0fff0f; | |
| color: #0f0f0f; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.title("π€ Neon TTS Converter") | |
| # Predefined voice options | |
| voices = { | |
| "Smooth Female": "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch.", | |
| "Monotone Male": "Jon's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.", | |
| "Energetic Youth": "An energetic young speaker with a lively tone and rapid speech, creating a sense of excitement.", | |
| "Calm Elderly": "An elderly speaker with a calm and slow-paced voice, bringing wisdom and serenity to the speech.", | |
| "Robotic": "A robotic, artificial voice with a consistent pitch and no variation in tone.", | |
| "Narrator": "A deep and clear voice, with a strong presence and a slightly slower pace, suitable for narrations.", | |
| "Whisper": "A soft, whispered voice, with very low volume and an intimate tone.", | |
| "Formal": "A formal, authoritative voice with clear articulation and a steady pace.", | |
| "Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.", | |
| "Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue.", | |
| "Bass-Heavy Male": "A deep, resonant male voice with a strong bass, ideal for dramatic and powerful delivery.", | |
| "Actor Voice 1": "An actor's voice with a dynamic range, capable of various emotional tones and expressions.", | |
| "Actor Voice 2": "A distinct and engaging actor's voice, providing a unique flair and character to the speech." | |
| } | |
| # Sidebar for voice selection | |
| st.sidebar.header("Select Voice") | |
| voice_choice = st.sidebar.selectbox("Choose a Voice", list(voices.keys())) | |
| # Display the selected voice description | |
| st.sidebar.markdown(f"**Description:** {voices[voice_choice]}") | |
| # Input for custom prompt | |
| st.sidebar.header("Custom Prompt") | |
| prompt = st.sidebar.text_area("Enter your custom prompt", value="Hey, how are you doing today?") | |
| # Error handling | |
| try: | |
| # Generate the TTS output | |
| if st.sidebar.button("Generate Speech"): | |
| description = voices[voice_choice] | |
| input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device) | |
| prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) | |
| # Create attention masks | |
| attention_mask = tokenizer(description, return_tensors="pt").attention_mask.to(device) | |
| prompt_attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.to(device) | |
| # Generate speech | |
| generation = model.generate( | |
| input_ids=input_ids, | |
| prompt_input_ids=prompt_input_ids, | |
| attention_mask=attention_mask, | |
| prompt_attention_mask=prompt_attention_mask | |
| ) | |
| audio_arr = generation.cpu().numpy().squeeze() | |
| # Save the audio file | |
| output_file = "parler_tts_out.wav" | |
| sf.write(output_file, audio_arr, model.config.sampling_rate) | |
| # Display the audio player | |
| st.audio(output_file) | |
| st.success("Speech generation complete!") | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |