Spaces:
Running
Running
zach
commited on
Commit
·
1a6c67a
1
Parent(s):
e91a94a
Simplify integration logic in app.py
Browse files- src/app.py +15 -50
- src/integrations/hume_api.py +1 -2
- src/utils.py +8 -19
src/app.py
CHANGED
|
@@ -30,9 +30,9 @@ from src.integrations import (
|
|
| 30 |
text_to_speech_with_hume,
|
| 31 |
)
|
| 32 |
from src.utils import (
|
| 33 |
-
choose_providers,
|
| 34 |
create_shuffled_tts_options,
|
| 35 |
determine_selected_option,
|
|
|
|
| 36 |
submit_voting_results,
|
| 37 |
validate_character_description_length,
|
| 38 |
validate_text_length,
|
|
@@ -124,68 +124,31 @@ class App:
|
|
| 124 |
logger.warning(f"Validation error: {ve}")
|
| 125 |
raise gr.Error(str(ve))
|
| 126 |
|
| 127 |
-
# Select 2 TTS providers based on whether the text has been modified.
|
| 128 |
text_modified = text != generated_text_state
|
| 129 |
-
provider_a
|
|
|
|
| 130 |
|
| 131 |
try:
|
| 132 |
-
start_time = time.time()
|
| 133 |
logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
|
|
|
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
result = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
raise ValueError("Expected 4 values from Hume TTS call when generating 2 outputs")
|
| 144 |
|
| 145 |
-
|
| 146 |
-
logger.info(f"Completed dual Hume synthesis in {time.time() - start_time:.2f} seconds")
|
| 147 |
-
else:
|
| 148 |
-
# Process API calls sequentially to avoid resource contention
|
| 149 |
-
logger.info(f"Sequential processing: First generating audio with {provider_a}")
|
| 150 |
|
| 151 |
-
# Generate a single Hume output
|
| 152 |
-
num_generations = 1
|
| 153 |
-
result_a = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
|
| 154 |
-
|
| 155 |
-
if not isinstance(result_a, tuple) or len(result_a) != 2:
|
| 156 |
-
raise ValueError("Expected 2 values from Hume TTS call when generating 1 output")
|
| 157 |
-
|
| 158 |
-
generation_id_a, audio_a = result_a[0], result_a[1]
|
| 159 |
-
logger.info(f"First audio generated in {time.time() - start_time:.2f} seconds")
|
| 160 |
-
|
| 161 |
-
# Generate a second TTS output from the second provider
|
| 162 |
-
logger.info(f"Now generating audio with {provider_b}")
|
| 163 |
-
second_start = time.time()
|
| 164 |
-
|
| 165 |
-
match provider_b:
|
| 166 |
-
case constants.ELEVENLABS:
|
| 167 |
-
result_b = await text_to_speech_with_elevenlabs(character_description, text, self.config)
|
| 168 |
-
case _:
|
| 169 |
-
# Additional TTS Providers can be added here.
|
| 170 |
-
raise ValueError(f"Unsupported provider: {provider_b}")
|
| 171 |
-
|
| 172 |
-
generation_id_b, audio_b = result_b[0], result_b[1]
|
| 173 |
-
|
| 174 |
-
logger.info(f"Second audio generated in {time.time() - second_start:.2f} seconds")
|
| 175 |
-
logger.info(f"Total synthesis time: {time.time() - start_time:.2f} seconds")
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
# Shuffle options so that placement of options in the UI will always be random.
|
| 179 |
option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
|
| 180 |
option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
|
| 181 |
options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
|
| 182 |
|
| 183 |
-
option_a_audio = options_map["option_a"]["audio_file_path"]
|
| 184 |
-
option_b_audio = options_map["option_b"]["audio_file_path"]
|
| 185 |
-
|
| 186 |
return (
|
| 187 |
-
gr.update(value=
|
| 188 |
-
gr.update(value=
|
| 189 |
options_map,
|
| 190 |
text_modified,
|
| 191 |
text,
|
|
@@ -194,9 +157,11 @@ class App:
|
|
| 194 |
except ElevenLabsError as ee:
|
| 195 |
logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
|
| 196 |
raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
|
|
|
|
| 197 |
except HumeError as he:
|
| 198 |
logger.error(f"HumeError while synthesizing speech from text: {he!s}")
|
| 199 |
raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
|
|
|
|
| 200 |
except Exception as e:
|
| 201 |
logger.error(f"Unexpected error during TTS generation: {e}")
|
| 202 |
raise gr.Error("An unexpected error occurred. Please try again later.")
|
|
|
|
| 30 |
text_to_speech_with_hume,
|
| 31 |
)
|
| 32 |
from src.utils import (
|
|
|
|
| 33 |
create_shuffled_tts_options,
|
| 34 |
determine_selected_option,
|
| 35 |
+
get_random_provider,
|
| 36 |
submit_voting_results,
|
| 37 |
validate_character_description_length,
|
| 38 |
validate_text_length,
|
|
|
|
| 124 |
logger.warning(f"Validation error: {ve}")
|
| 125 |
raise gr.Error(str(ve))
|
| 126 |
|
|
|
|
| 127 |
text_modified = text != generated_text_state
|
| 128 |
+
provider_a = constants.HUME_AI # always compare with Hume
|
| 129 |
+
provider_b = get_random_provider(text_modified)
|
| 130 |
|
| 131 |
try:
|
|
|
|
| 132 |
logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
|
| 133 |
+
generation_id_a, audio_a = await text_to_speech_with_hume(character_description, text, self.config)
|
| 134 |
|
| 135 |
+
tts_provider_funcs = {
|
| 136 |
+
constants.HUME_AI: text_to_speech_with_hume,
|
| 137 |
+
constants.ELEVENLABS: text_to_speech_with_elevenlabs,
|
| 138 |
+
}
|
|
|
|
| 139 |
|
| 140 |
+
if provider_b not in tts_provider_funcs:
|
| 141 |
+
raise ValueError(f"Unsupported provider: {provider_b}")
|
|
|
|
| 142 |
|
| 143 |
+
generation_id_b, audio_b = await tts_provider_funcs[provider_b](character_description, text, self.config)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
|
| 146 |
option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
|
| 147 |
options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
|
| 148 |
|
|
|
|
|
|
|
|
|
|
| 149 |
return (
|
| 150 |
+
gr.update(value=options_map["option_a"]["audio_file_path"], visible=True, autoplay=True),
|
| 151 |
+
gr.update(value=options_map["option_b"]["audio_file_path"], visible=True),
|
| 152 |
options_map,
|
| 153 |
text_modified,
|
| 154 |
text,
|
|
|
|
| 157 |
except ElevenLabsError as ee:
|
| 158 |
logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
|
| 159 |
raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
|
| 160 |
+
|
| 161 |
except HumeError as he:
|
| 162 |
logger.error(f"HumeError while synthesizing speech from text: {he!s}")
|
| 163 |
raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
|
| 164 |
+
|
| 165 |
except Exception as e:
|
| 166 |
logger.error(f"Unexpected error during TTS generation: {e}")
|
| 167 |
raise gr.Error("An unexpected error occurred. Please try again later.")
|
src/integrations/hume_api.py
CHANGED
|
@@ -20,8 +20,7 @@ from typing import Tuple, Union
|
|
| 20 |
# Third-Party Library Imports
|
| 21 |
from hume import AsyncHumeClient
|
| 22 |
from hume.core.api_error import ApiError
|
| 23 |
-
from hume.tts import PostedUtterance
|
| 24 |
-
from hume.tts.types import Format, FormatMp3, ReturnTts
|
| 25 |
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
|
| 26 |
|
| 27 |
# Local Application Imports
|
|
|
|
| 20 |
# Third-Party Library Imports
|
| 21 |
from hume import AsyncHumeClient
|
| 22 |
from hume.core.api_error import ApiError
|
| 23 |
+
from hume.tts.types import Format, FormatMp3, PostedUtterance, ReturnTts
|
|
|
|
| 24 |
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
|
| 25 |
|
| 26 |
# Local Application Imports
|
src/utils.py
CHANGED
|
@@ -203,33 +203,22 @@ def save_base64_audio_to_file(base64_audio: str, filename: str, config: Config)
|
|
| 203 |
return str(relative_path)
|
| 204 |
|
| 205 |
|
| 206 |
-
def
|
| 207 |
"""
|
| 208 |
-
Select
|
| 209 |
-
|
| 210 |
-
The first provider is always set to "Hume AI". For the second provider:
|
| 211 |
-
- If the text has been modified or no character description is provided, it will be "Hume AI"
|
| 212 |
-
- Otherwise, it will be "Hume AI" 30% of the time and "ElevenLabs" 70% of the time
|
| 213 |
|
| 214 |
Args:
|
| 215 |
text_modified (bool): A flag indicating whether the text has been modified.
|
| 216 |
|
| 217 |
Returns:
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
"""
|
|
|
|
|
|
|
| 222 |
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
provider_a = constants.HUME_AI
|
| 226 |
-
|
| 227 |
-
if hume_comparison_only:
|
| 228 |
-
provider_b = constants.HUME_AI
|
| 229 |
-
else:
|
| 230 |
-
provider_b = constants.HUME_AI if random.random() < 0.3 else constants.ELEVENLABS
|
| 231 |
-
|
| 232 |
-
return provider_a, provider_b
|
| 233 |
|
| 234 |
|
| 235 |
def create_shuffled_tts_options(option_a: Option, option_b: Option) -> OptionMap:
|
|
|
|
| 203 |
return str(relative_path)
|
| 204 |
|
| 205 |
|
| 206 |
+
def get_random_provider(text_modified: bool) -> TTSProviderName:
|
| 207 |
"""
|
| 208 |
+
Select a TTS provider based on whether the text has been modified.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
Args:
|
| 211 |
text_modified (bool): A flag indicating whether the text has been modified.
|
| 212 |
|
| 213 |
Returns:
|
| 214 |
+
provider: A TTS provider selected based on the following criteria:
|
| 215 |
+
- If the text has been modified, it will be "Hume AI"
|
| 216 |
+
- Otherwise, it will be "Hume AI" 30% of the time and "ElevenLabs" 70% of the time
|
| 217 |
"""
|
| 218 |
+
if text_modified:
|
| 219 |
+
return constants.HUME_AI
|
| 220 |
|
| 221 |
+
return constants.HUME_AI if random.random() < 0.3 else constants.ELEVENLABS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
|
| 224 |
def create_shuffled_tts_options(option_a: Option, option_b: Option) -> OptionMap:
|