Spaces:
Running
Running
File size: 7,594 Bytes
3ce989d 7d79ca4 adecb62 7d79ca4 3ce989d 4ea25cd 136ff40 4ea25cd 3ce989d a807c4d 4ea25cd 7d79ca4 a807c4d 3ce989d 4ea25cd 3ce989d 4ea25cd 3ce989d 4ea25cd 3ce989d e9bcee8 4ea25cd 3ce989d 4ea25cd 3ce989d adecb62 4ea25cd 3ce989d 4ea25cd adecb62 4ea25cd 136ff40 4ea25cd 136ff40 adecb62 4ea25cd adecb62 3ce989d e9bcee8 4ea25cd 3ce989d e9bcee8 4ea25cd 3ce989d 4ea25cd 3ce989d 4ea25cd 3ce989d 4ea25cd adecb62 e9bcee8 4ea25cd adecb62 3ce989d 4ea25cd 3ce989d 96154e7 4ea25cd 136ff40 4ea25cd 96154e7 4ea25cd 96154e7 3ce989d e9bcee8 3ce989d 4ea25cd 3ce989d 4ea25cd 3ce989d e9bcee8 3ce989d 4ea25cd adecb62 e9bcee8 adecb62 4ea25cd adecb62 3ce989d 4ea25cd 96154e7 4ea25cd 96154e7 3ce989d 4ea25cd 3ce989d e9bcee8 3ce989d e9bcee8 3ce989d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
"""
app.py
This file defines the Gradio user interface for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
The generated text is then converted to audio using both Hume and ElevenLabs TTS APIs, allowing playback in the Gradio UI.
"""
# Standard Library Imports
from concurrent.futures import ThreadPoolExecutor
import random
# Third-Party Library Imports
import gradio as gr
# Local Application Imports
from src.config import logger
from src.constants import PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH, SAMPLE_PROMPTS
from src.integrations import generate_text_with_claude, text_to_speech_with_hume, text_to_speech_with_elevenlabs
from src.utils import truncate_text, validate_prompt_length
def process_prompt(prompt: str):
"""
Processes the user input by generating text using Claude API, then converting
the generated text to speech using both Hume and ElevenLabs TTS APIs.
Args:
prompt (str): The user's input prompt.
Returns:
tuple: Generated text, two audio paths (Hume & ElevenLabs), and a mapping
of audio options to their respective TTS providers.
"""
logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
try:
# Validate prompt length
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
# Generate text
generated_text = generate_text_with_claude(prompt)
logger.info(f'Generated text successfully (length={len(generated_text)} characters).')
# Run TTS generation in parallel
with ThreadPoolExecutor(max_workers=2) as executor:
hume_future = executor.submit(text_to_speech_with_hume, prompt, generated_text)
elevenlabs_future = executor.submit(text_to_speech_with_elevenlabs, generated_text)
# Retrieve results
hume_audio = hume_future.result()
elevenlabs_audio = elevenlabs_future.result()
logger.info(
f'TTS audio generated: Hume={len(hume_audio)} bytes, '
f'ElevenLabs={len(elevenlabs_audio)} bytes'
)
# Randomly assign audio options
audio_options = [
(hume_audio, 'Hume TTS'),
(elevenlabs_audio, 'ElevenLabs TTS'),
]
random.shuffle(audio_options)
option1_audio, option1_provider = audio_options[0]
option2_audio, option2_provider = audio_options[1]
return generated_text, option1_audio, option2_audio, {
'Option 1': option1_provider,
'Option 2': option2_provider,
}
except ValueError as ve:
logger.warning(f'Validation error: {ve}')
return str(ve), None, None, {}
except Exception as e:
logger.error(f'Unexpected error during processing: {e}')
return 'An unexpected error occurred. Please try again.', None, None, {}
def run_process_prompt(prompt: str):
"""
Handles the UI state transitions while processing a prompt.
Args:
prompt (str): The user's input prompt.
Yields:
tuple: Updates to the UI elements in three stages:
1. Disabling UI and clearing previous outputs.
2. Displaying generated content.
3. Re-enabling UI after generation completes.
"""
# Stage 1: Disable UI and clear previous outputs
yield (
gr.update(interactive=False), # Disable Generate Button
gr.update(value=None), # Clear generated text
gr.update(value=None), # Clear Option 1 audio
gr.update(value=None), # Clear Option 2 audio
gr.update(value=None), # Clear option mapping
None, # Reset Option 2 audio state
)
# Process the prompt
generated_text, option1_audio, option2_audio, option_mapping = process_prompt(prompt)
# Stage 2: Display generated text and first audio (autoplay)
yield (
gr.update(interactive=True), # Enable Generate Button
gr.update(value=generated_text), # Show generated text
gr.update(value=option1_audio, autoplay=True), # Set Option 1 audio
gr.update(value=option2_audio), # Set Option 2 audio
gr.update(value=option_mapping), # Store option mapping
option2_audio, # Store Option 2 audio
)
def build_gradio_interface() -> gr.Blocks:
"""
Constructs the Gradio user interface.
Returns:
gr.Blocks: The Gradio Blocks-based UI.
"""
with gr.Blocks() as demo:
# UI title & instructions
gr.Markdown('# TTS Arena')
gr.Markdown(
'Generate text from a prompt using **Claude by Anthropic**, '
'and compare text-to-speech outputs from **Hume TTS API** and **ElevenLabs TTS API**.'
)
# Prompt selection
with gr.Row():
sample_prompt_dropdown = gr.Dropdown(
choices=list(SAMPLE_PROMPTS.keys()),
label='Choose a sample prompt (or enter your own below)',
value=None,
interactive=True,
)
# Prompt input
with gr.Row():
prompt_input = gr.Textbox(
label='Enter your prompt',
placeholder='Or type your own prompt here...',
lines=2,
max_lines=2
)
# Generate button
with gr.Row():
generate_button = gr.Button('Generate')
# Output section
with gr.Column():
output_text = gr.Textbox(
label='Generated Text',
interactive=False,
lines=8,
max_lines=12,
)
with gr.Row():
option1_audio_player = gr.Audio(label='Option 1', type='filepath', interactive=False)
option2_audio_player = gr.Audio(label='Option 2', type='filepath', interactive=False)
# UI state components
option_mapping_state = gr.State()
option2_audio_state = gr.State()
# Event handlers
sample_prompt_dropdown.change(
fn=lambda choice: SAMPLE_PROMPTS.get(choice, ""),
inputs=[sample_prompt_dropdown],
outputs=[prompt_input],
)
generate_button.click(
fn=run_process_prompt,
inputs=[prompt_input],
outputs=[
generate_button,
output_text,
option1_audio_player,
option2_audio_player,
option_mapping_state,
option2_audio_state,
],
)
# Auto-play second audio after first completes
option1_audio_player.stop(
fn=lambda _: gr.update(value=None), # Reset first audio before playing second
inputs=[option1_audio_player],
outputs=[option2_audio_player],
).then(
fn=lambda option2_audio: gr.update(value=option2_audio, autoplay=True),
inputs=[option2_audio_state],
outputs=[option2_audio_player],
)
logger.debug('Gradio interface built successfully')
return demo
if __name__ == '__main__':
logger.info('Launching TTS Arena Gradio app...')
demo = build_gradio_interface()
demo.launch() |