IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Jul 8, 2024

Commit

3f3507e

verified ·

1 Parent(s): 2ba7c8c

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -1

app.py CHANGED Viewed

@@ -682,6 +682,9 @@ import librosa
 from pathlib import Path
 import torchaudio
 # Check if the token is already set in the environment variables
 hf_token = os.getenv("HF_TOKEN")
 if hf_token is None:
@@ -914,6 +917,8 @@ def bot(history, choice, tts_choice):
             audio_future = executor.submit(generate_audio_parler_tts, response)
         elif tts_choice == "MARS5":
             audio_future = executor.submit(generate_audio_mars5, response)
         elif tts_choice == "Toucan TTS":
             audio_future = executor.submit(generate_audio_toucan_tts, response)
@@ -1236,6 +1241,26 @@ def generate_audio_mars5(text):
     logging.debug(f"Audio saved to {combined_audio_path}")
     return combined_audio_path
 def float2pcm(sig, dtype='int16'):
     """
@@ -1374,7 +1399,7 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
             chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
-            tts_choice = gr.Radio(label="Select TTS System", choices=["Eleven Labs", "Parler-TTS", "MARS5","Toucan TTS"], value="Eleven Labs")
             bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
             bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
             chatbot.like(print_like_dislike, None, None)
@@ -1402,6 +1427,7 @@ demo.launch(share=True)
 # import gradio as gr
 # import requests
 # import os

 from pathlib import Path
 import torchaudio
+# Import the Toucan TTS dependencies
+from InferenceInterfaces.Meta_FastSpeech2 import Meta_FastSpeech2
 # Check if the token is already set in the environment variables
 hf_token = os.getenv("HF_TOKEN")
 if hf_token is None:
             audio_future = executor.submit(generate_audio_parler_tts, response)
         elif tts_choice == "MARS5":
             audio_future = executor.submit(generate_audio_mars5, response)
+        elif tts_choice == "Meta Voice":
+            audio_future = executor.submit(generate_audio_meta_voice, response)
         elif tts_choice == "Toucan TTS":
             audio_future = executor.submit(generate_audio_toucan_tts, response)
     logging.debug(f"Audio saved to {combined_audio_path}")
     return combined_audio_path
+def generate_audio_meta_voice(text):
+    description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
+    chunks = chunk_text(preprocess(text))
+    audio_segments = []
+    for chunk in chunks:
+        prompt = parler_tokenizer(chunk, return_tensors="pt").to(device)
+        generation = parler_model.generate(prompt_input_ids=prompt.input_ids)
+        audio_arr = generation.cpu().numpy().squeeze()
+        temp_audio_path = os.path.join(tempfile.gettempdir(), f"meta_voice_audio_{len(audio_segments)}.wav")
+        write_wav(temp_audio_path, SAMPLE_RATE, audio_arr)
+        audio_segments.append(AudioSegment.from_wav(temp_audio_path))
+    combined_audio = sum(audio_segments)
+    combined_audio_path = os.path.join(tempfile.gettempdir(), "meta_voice_combined_audio.wav")
+    combined_audio.export(combined_audio_path, format="wav")
+    logging.debug(f"Audio saved to {combined_audio_path}")
+    return combined_audio_path
 def float2pcm(sig, dtype='int16'):
     """
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
             chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
+            tts_choice = gr.Radio(label="Select TTS System", choices=["Eleven Labs", "Parler-TTS", "MARS5", "Meta Voice", "Toucan TTS"], value="Eleven Labs")
             bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
             bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
             chatbot.like(print_like_dislike, None, None)
 # import gradio as gr
 # import requests
 # import os