Spaces:
Running
on
Zero
Running
on
Zero
Add MCP server (#6)
Browse files- Add MCP server (778db97e13636a827e72e88a60aaf9960bb3b2d7)
Co-authored-by: Apolinário from multimodal AI art <[email protected]>
app.py
CHANGED
@@ -212,7 +212,16 @@ def normalize_text(transcript: str):
|
|
212 |
|
213 |
@spaces.GPU
|
214 |
def initialize_engine(model_path, audio_tokenizer_path) -> bool:
|
215 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
global engine
|
217 |
try:
|
218 |
logger.info(f"Initializing engine with model: {model_path} and audio tokenizer: {audio_tokenizer_path}")
|
@@ -301,7 +310,26 @@ def text_to_speech(
|
|
301 |
ras_win_len=7,
|
302 |
ras_win_max_num_repeat=2,
|
303 |
):
|
304 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
global engine
|
306 |
|
307 |
if engine is None:
|
@@ -518,6 +546,15 @@ def create_ui():
|
|
518 |
|
519 |
# Function to play voice sample when clicking on a row
|
520 |
def play_voice_sample(evt: gr.SelectData):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
521 |
try:
|
522 |
# Get the preset name from the clicked row
|
523 |
preset_names = [preset for preset in VOICE_PRESETS.keys() if preset != "EMPTY"]
|
@@ -541,6 +578,16 @@ def create_ui():
|
|
541 |
|
542 |
# Function to handle template selection
|
543 |
def apply_template(template_name):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
544 |
if template_name in PREDEFINED_EXAMPLES:
|
545 |
template = PREDEFINED_EXAMPLES[template_name]
|
546 |
# Enable voice preset and custom reference only for voice-clone template
|
@@ -642,8 +689,8 @@ def main():
|
|
642 |
|
643 |
# Create and launch the UI
|
644 |
demo = create_ui()
|
645 |
-
demo.launch(server_name=args.host, server_port=args.port)
|
646 |
|
647 |
|
648 |
if __name__ == "__main__":
|
649 |
-
main()
|
|
|
212 |
|
213 |
@spaces.GPU
|
214 |
def initialize_engine(model_path, audio_tokenizer_path) -> bool:
|
215 |
+
"""
|
216 |
+
Initialize the HiggsAudioServeEngine with the specified model and tokenizer.
|
217 |
+
|
218 |
+
Args:
|
219 |
+
model_path: Path to the model to load
|
220 |
+
audio_tokenizer_path: Path to the audio tokenizer to load
|
221 |
+
|
222 |
+
Returns:
|
223 |
+
True if initialization was successful, False otherwise
|
224 |
+
"""
|
225 |
global engine
|
226 |
try:
|
227 |
logger.info(f"Initializing engine with model: {model_path} and audio tokenizer: {audio_tokenizer_path}")
|
|
|
310 |
ras_win_len=7,
|
311 |
ras_win_max_num_repeat=2,
|
312 |
):
|
313 |
+
"""
|
314 |
+
Convert text to speech using HiggsAudioServeEngine.
|
315 |
+
|
316 |
+
Args:
|
317 |
+
text: The text to convert to speech
|
318 |
+
voice_preset: The voice preset to use (or "EMPTY" for no preset)
|
319 |
+
reference_audio: Optional path to reference audio file
|
320 |
+
reference_text: Optional transcript of the reference audio
|
321 |
+
max_completion_tokens: Maximum number of tokens to generate
|
322 |
+
temperature: Sampling temperature for generation
|
323 |
+
top_p: Top-p sampling parameter
|
324 |
+
top_k: Top-k sampling parameter
|
325 |
+
system_prompt: System prompt to guide the model
|
326 |
+
stop_strings: Dataframe containing stop strings
|
327 |
+
ras_win_len: Window length for repetition avoidance sampling
|
328 |
+
ras_win_max_num_repeat: Maximum number of repetitions allowed in the window
|
329 |
+
|
330 |
+
Returns:
|
331 |
+
Tuple of (generated_text, (sample_rate, audio_data)) where audio_data is int16 numpy array
|
332 |
+
"""
|
333 |
global engine
|
334 |
|
335 |
if engine is None:
|
|
|
546 |
|
547 |
# Function to play voice sample when clicking on a row
|
548 |
def play_voice_sample(evt: gr.SelectData):
|
549 |
+
"""
|
550 |
+
Play a voice sample when a row is clicked in the voice samples table.
|
551 |
+
|
552 |
+
Args:
|
553 |
+
evt: The select event containing the clicked row index
|
554 |
+
|
555 |
+
Returns:
|
556 |
+
Path to the voice sample audio file, or None if not found
|
557 |
+
"""
|
558 |
try:
|
559 |
# Get the preset name from the clicked row
|
560 |
preset_names = [preset for preset in VOICE_PRESETS.keys() if preset != "EMPTY"]
|
|
|
578 |
|
579 |
# Function to handle template selection
|
580 |
def apply_template(template_name):
|
581 |
+
"""
|
582 |
+
Apply a predefined template to the UI components.
|
583 |
+
|
584 |
+
Args:
|
585 |
+
template_name: Name of the template to apply
|
586 |
+
|
587 |
+
Returns:
|
588 |
+
Tuple of updated values for system_prompt, input_text, template_description,
|
589 |
+
voice_preset, custom_reference_accordion, voice_samples_section, and ras_win_len
|
590 |
+
"""
|
591 |
if template_name in PREDEFINED_EXAMPLES:
|
592 |
template = PREDEFINED_EXAMPLES[template_name]
|
593 |
# Enable voice preset and custom reference only for voice-clone template
|
|
|
689 |
|
690 |
# Create and launch the UI
|
691 |
demo = create_ui()
|
692 |
+
demo.launch(server_name=args.host, server_port=args.port, mcp_server=True)
|
693 |
|
694 |
|
695 |
if __name__ == "__main__":
|
696 |
+
main()
|