smola multimodalart HF Staff commited on
Commit
5c4e24a
·
verified ·
1 Parent(s): 355d056

Add MCP server (#6)

Browse files

- Add MCP server (778db97e13636a827e72e88a60aaf9960bb3b2d7)


Co-authored-by: Apolinário from multimodal AI art <[email protected]>

Files changed (1) hide show
  1. app.py +51 -4
app.py CHANGED
@@ -212,7 +212,16 @@ def normalize_text(transcript: str):
212
 
213
  @spaces.GPU
214
  def initialize_engine(model_path, audio_tokenizer_path) -> bool:
215
- """Initialize the HiggsAudioServeEngine."""
 
 
 
 
 
 
 
 
 
216
  global engine
217
  try:
218
  logger.info(f"Initializing engine with model: {model_path} and audio tokenizer: {audio_tokenizer_path}")
@@ -301,7 +310,26 @@ def text_to_speech(
301
  ras_win_len=7,
302
  ras_win_max_num_repeat=2,
303
  ):
304
- """Convert text to speech using HiggsAudioServeEngine."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  global engine
306
 
307
  if engine is None:
@@ -518,6 +546,15 @@ def create_ui():
518
 
519
  # Function to play voice sample when clicking on a row
520
  def play_voice_sample(evt: gr.SelectData):
 
 
 
 
 
 
 
 
 
521
  try:
522
  # Get the preset name from the clicked row
523
  preset_names = [preset for preset in VOICE_PRESETS.keys() if preset != "EMPTY"]
@@ -541,6 +578,16 @@ def create_ui():
541
 
542
  # Function to handle template selection
543
  def apply_template(template_name):
 
 
 
 
 
 
 
 
 
 
544
  if template_name in PREDEFINED_EXAMPLES:
545
  template = PREDEFINED_EXAMPLES[template_name]
546
  # Enable voice preset and custom reference only for voice-clone template
@@ -642,8 +689,8 @@ def main():
642
 
643
  # Create and launch the UI
644
  demo = create_ui()
645
- demo.launch(server_name=args.host, server_port=args.port)
646
 
647
 
648
  if __name__ == "__main__":
649
- main()
 
212
 
213
  @spaces.GPU
214
  def initialize_engine(model_path, audio_tokenizer_path) -> bool:
215
+ """
216
+ Initialize the HiggsAudioServeEngine with the specified model and tokenizer.
217
+
218
+ Args:
219
+ model_path: Path to the model to load
220
+ audio_tokenizer_path: Path to the audio tokenizer to load
221
+
222
+ Returns:
223
+ True if initialization was successful, False otherwise
224
+ """
225
  global engine
226
  try:
227
  logger.info(f"Initializing engine with model: {model_path} and audio tokenizer: {audio_tokenizer_path}")
 
310
  ras_win_len=7,
311
  ras_win_max_num_repeat=2,
312
  ):
313
+ """
314
+ Convert text to speech using HiggsAudioServeEngine.
315
+
316
+ Args:
317
+ text: The text to convert to speech
318
+ voice_preset: The voice preset to use (or "EMPTY" for no preset)
319
+ reference_audio: Optional path to reference audio file
320
+ reference_text: Optional transcript of the reference audio
321
+ max_completion_tokens: Maximum number of tokens to generate
322
+ temperature: Sampling temperature for generation
323
+ top_p: Top-p sampling parameter
324
+ top_k: Top-k sampling parameter
325
+ system_prompt: System prompt to guide the model
326
+ stop_strings: Dataframe containing stop strings
327
+ ras_win_len: Window length for repetition avoidance sampling
328
+ ras_win_max_num_repeat: Maximum number of repetitions allowed in the window
329
+
330
+ Returns:
331
+ Tuple of (generated_text, (sample_rate, audio_data)) where audio_data is int16 numpy array
332
+ """
333
  global engine
334
 
335
  if engine is None:
 
546
 
547
  # Function to play voice sample when clicking on a row
548
  def play_voice_sample(evt: gr.SelectData):
549
+ """
550
+ Play a voice sample when a row is clicked in the voice samples table.
551
+
552
+ Args:
553
+ evt: The select event containing the clicked row index
554
+
555
+ Returns:
556
+ Path to the voice sample audio file, or None if not found
557
+ """
558
  try:
559
  # Get the preset name from the clicked row
560
  preset_names = [preset for preset in VOICE_PRESETS.keys() if preset != "EMPTY"]
 
578
 
579
  # Function to handle template selection
580
  def apply_template(template_name):
581
+ """
582
+ Apply a predefined template to the UI components.
583
+
584
+ Args:
585
+ template_name: Name of the template to apply
586
+
587
+ Returns:
588
+ Tuple of updated values for system_prompt, input_text, template_description,
589
+ voice_preset, custom_reference_accordion, voice_samples_section, and ras_win_len
590
+ """
591
  if template_name in PREDEFINED_EXAMPLES:
592
  template = PREDEFINED_EXAMPLES[template_name]
593
  # Enable voice preset and custom reference only for voice-clone template
 
689
 
690
  # Create and launch the UI
691
  demo = create_ui()
692
+ demo.launch(server_name=args.host, server_port=args.port, mcp_server=True)
693
 
694
 
695
  if __name__ == "__main__":
696
+ main()