Spaces:

HumeAI
/

expressive-tts-arena

Running

expressive-tts-arena / src /integrations /hume_api.py

zach

raise friendlier error message to UI from integration code

7854f13 7 months ago

7.47 kB

	"""
	hume_api.py

	This file defines the interaction with the Hume text-to-speech (TTS) API using the
	Hume Python SDK. It includes functionality for API request handling and processing API responses.

	Key Features:
	- Encapsulates all logic related to the Hume TTS API.
	- Implements retry logic for handling transient API errors.
	- Handles received audio and processes it for playback on the web.
	- Provides detailed logging for debugging and error tracking.
	"""

	# Standard Library Imports
	import logging
	import time
	from dataclasses import dataclass, field
	from typing import Tuple, Union

	# Third-Party Library Imports
	from hume import AsyncHumeClient
	from hume.core.api_error import ApiError
	from hume.tts import PostedUtterance
	from hume.tts.types import ReturnGeneration, ReturnTts
	from hume.tts.types.format import Format, FormatMp3
	from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential

	# Local Application Imports
	from src.config import Config, logger
	from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
	from src.utils import save_base64_audio_to_file, validate_env_var


	@dataclass(frozen=True)
	class HumeConfig:
	"""Immutable configuration for interacting with the Hume TTS API."""

	api_key: str = field(init=False)
	file_format: Format = field(default_factory=FormatMp3)
	request_timeout: float = 40.0

	def __post_init__(self) -> None:
	"""Validate required attributes and set computed fields."""
	if not self.file_format:
	raise ValueError("Hume TTS file format is not set.")

	computed_api_key = validate_env_var("HUME_API_KEY")
	object.__setattr__(self, "api_key", computed_api_key)


	class HumeError(Exception):
	"""Custom exception for errors related to the Hume TTS API."""

	def __init__(self, message: str, original_exception: Union[Exception, None] = None):
	super().__init__(message)
	self.original_exception = original_exception
	self.message = message


	class UnretryableHumeError(HumeError):
	"""Custom exception for errors related to the Hume TTS API that should not be retried."""

	def __init__(self, message: str, original_exception: Union[Exception, None] = None):
	super().__init__(message, original_exception)
	self.original_exception = original_exception
	self.message = message


	@retry(
	retry=retry_if_exception(lambda e: not isinstance(e, UnretryableHumeError)),
	stop=stop_after_attempt(3),
	wait=wait_exponential(multiplier=1, min=2, max=5),
	before=before_log(logger, logging.DEBUG),
	after=after_log(logger, logging.DEBUG),
	reraise=True,
	)
	async def text_to_speech_with_hume(
	character_description: str,
	text: str,
	config: Config,
	) -> Tuple[str, str]:
	"""
	Asynchronously synthesizes speech using the Hume TTS API, processes audio data, and writes audio to a file.

	This function uses the Hume Python SDK to send a request to the Hume TTS API with a character description
	and text to be converted to speech. It extracts the base64-encoded audio and generation ID from the response,
	saves the audio as an MP3 file, and returns the relevant details.

	Args:
	character_description (str): Description used for voice synthesis.
	text (str): Text to be converted to speech.
	config (Config): Application configuration containing Hume API settings.

	Returns:
	Tuple[str, str]: A tuple containing:
	- generation_id (str): Unique identifier for the generated audio.
	- audio_file_path (str): Path to the saved audio file.

	Raises:
	HumeError: For errors communicating with the Hume API.
	UnretryableHumeError: For client-side HTTP errors (status code 4xx).
	"""
	logger.debug(
	"Processing TTS with Hume. "
	f"Character description length: {len(character_description)}. "
	f"Text length: {len(text)}."
	)

	hume_config = config.hume_config

	start_time = time.time()
	try:
	hume_client = AsyncHumeClient(
	api_key=hume_config.api_key,
	timeout=hume_config.request_timeout
	)

	utterance = PostedUtterance(
	text=text,
	description=character_description or None
	)

	response: ReturnTts = await hume_client.tts.synthesize_json(
	utterances=[utterance],
	format=hume_config.file_format,
	)

	elapsed_time = time.time() - start_time
	logger.info(f"Hume API request completed in {elapsed_time:.2f} seconds")

	generations = response.generations
	if not generations:
	msg = "No generations returned by Hume API."
	logger.error(msg)
	raise HumeError(msg)

	generation_a = generations[0]
	generation_a_id, audio_a_path = _parse_hume_tts_generation(generation_a, config)

	return (generation_a_id, audio_a_path)

	except ApiError as e:
	elapsed_time = time.time() - start_time
	logger.error(f"Hume API request failed after {elapsed_time:.2f} seconds: {e!s}")
	clean_message = _extract_hume_api_error_message(e)
	logger.error(f"Full Hume API error: {e!s}")

	if (
	hasattr(e, 'status_code')
	and e.status_code is not None
	and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE
	):
	raise UnretryableHumeError(message=clean_message, original_exception=e) from e

	raise HumeError(message=clean_message, original_exception=e) from e

	except Exception as e:
	error_type = type(e).__name__
	error_message = str(e) if str(e) else f"An error of type {error_type} occurred"
	logger.error("Error during Hume API call: %s - %s", error_type, error_message)
	raise HumeError(message=error_message, original_exception=e) from e


	def _parse_hume_tts_generation(generation: ReturnGeneration, config: Config) -> Tuple[str, str]:
	"""
	Parses a Hume TTS generation response and saves the decoded audio as an MP3 file.

	Args:
	generation (ReturnGeneration): TTS generation response containing generation_id and audio.
	config (Config): Application configuration for saving the audio file.

	Returns:
	Tuple[str, str]: (generation_id, audio_path)

	Raises:
	KeyError: If expected attributes are missing.
	Exception: Propagates exceptions from saving the audio file.
	"""
	if not generation.generation_id:
	raise KeyError("The generation is missing the generation_id.")

	if not generation.audio:
	raise KeyError("The generation is missing the audio data.")

	filename = f"{generation.generation_id}.mp3"
	audio_file_path = save_base64_audio_to_file(generation.audio, filename, config)
	return generation.generation_id, audio_file_path


	def _extract_hume_api_error_message(e: ApiError) -> str:
	"""
	Extracts a clean, user-friendly error message from a Hume API error response.

	Args:
	e (ApiError): The Hume API error exception containing response information.

	Returns:
	str: A clean, user-friendly error message suitable for display to end users.
	"""
	clean_message = "An unknown error has occurred. Please try again later."

	if hasattr(e, 'body') and isinstance(e.body, dict) and 'message' in e.body:
	clean_message = e.body['message']

	return clean_message