Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -682,6 +682,9 @@ import librosa
|
|
682 |
from pathlib import Path
|
683 |
import torchaudio
|
684 |
|
|
|
|
|
|
|
685 |
# Check if the token is already set in the environment variables
|
686 |
hf_token = os.getenv("HF_TOKEN")
|
687 |
if hf_token is None:
|
@@ -914,6 +917,8 @@ def bot(history, choice, tts_choice):
|
|
914 |
audio_future = executor.submit(generate_audio_parler_tts, response)
|
915 |
elif tts_choice == "MARS5":
|
916 |
audio_future = executor.submit(generate_audio_mars5, response)
|
|
|
|
|
917 |
elif tts_choice == "Toucan TTS":
|
918 |
audio_future = executor.submit(generate_audio_toucan_tts, response)
|
919 |
|
@@ -1236,6 +1241,26 @@ def generate_audio_mars5(text):
|
|
1236 |
logging.debug(f"Audio saved to {combined_audio_path}")
|
1237 |
return combined_audio_path
|
1238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1239 |
|
1240 |
def float2pcm(sig, dtype='int16'):
|
1241 |
"""
|
@@ -1374,7 +1399,7 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
1374 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
1375 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
1376 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
1377 |
-
tts_choice = gr.Radio(label="Select TTS System", choices=["Eleven Labs", "Parler-TTS", "MARS5","Toucan TTS"], value="Eleven Labs")
|
1378 |
bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
|
1379 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
1380 |
chatbot.like(print_like_dislike, None, None)
|
@@ -1402,6 +1427,7 @@ demo.launch(share=True)
|
|
1402 |
|
1403 |
|
1404 |
|
|
|
1405 |
# import gradio as gr
|
1406 |
# import requests
|
1407 |
# import os
|
|
|
682 |
from pathlib import Path
|
683 |
import torchaudio
|
684 |
|
685 |
+
# Import the Toucan TTS dependencies
|
686 |
+
from InferenceInterfaces.Meta_FastSpeech2 import Meta_FastSpeech2
|
687 |
+
|
688 |
# Check if the token is already set in the environment variables
|
689 |
hf_token = os.getenv("HF_TOKEN")
|
690 |
if hf_token is None:
|
|
|
917 |
audio_future = executor.submit(generate_audio_parler_tts, response)
|
918 |
elif tts_choice == "MARS5":
|
919 |
audio_future = executor.submit(generate_audio_mars5, response)
|
920 |
+
elif tts_choice == "Meta Voice":
|
921 |
+
audio_future = executor.submit(generate_audio_meta_voice, response)
|
922 |
elif tts_choice == "Toucan TTS":
|
923 |
audio_future = executor.submit(generate_audio_toucan_tts, response)
|
924 |
|
|
|
1241 |
logging.debug(f"Audio saved to {combined_audio_path}")
|
1242 |
return combined_audio_path
|
1243 |
|
1244 |
+
def generate_audio_meta_voice(text):
|
1245 |
+
description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
|
1246 |
+
chunks = chunk_text(preprocess(text))
|
1247 |
+
audio_segments = []
|
1248 |
+
|
1249 |
+
for chunk in chunks:
|
1250 |
+
prompt = parler_tokenizer(chunk, return_tensors="pt").to(device)
|
1251 |
+
generation = parler_model.generate(prompt_input_ids=prompt.input_ids)
|
1252 |
+
audio_arr = generation.cpu().numpy().squeeze()
|
1253 |
+
|
1254 |
+
temp_audio_path = os.path.join(tempfile.gettempdir(), f"meta_voice_audio_{len(audio_segments)}.wav")
|
1255 |
+
write_wav(temp_audio_path, SAMPLE_RATE, audio_arr)
|
1256 |
+
audio_segments.append(AudioSegment.from_wav(temp_audio_path))
|
1257 |
+
|
1258 |
+
combined_audio = sum(audio_segments)
|
1259 |
+
combined_audio_path = os.path.join(tempfile.gettempdir(), "meta_voice_combined_audio.wav")
|
1260 |
+
combined_audio.export(combined_audio_path, format="wav")
|
1261 |
+
|
1262 |
+
logging.debug(f"Audio saved to {combined_audio_path}")
|
1263 |
+
return combined_audio_path
|
1264 |
|
1265 |
def float2pcm(sig, dtype='int16'):
|
1266 |
"""
|
|
|
1399 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
1400 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
1401 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
1402 |
+
tts_choice = gr.Radio(label="Select TTS System", choices=["Eleven Labs", "Parler-TTS", "MARS5", "Meta Voice", "Toucan TTS"], value="Eleven Labs")
|
1403 |
bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
|
1404 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
1405 |
chatbot.like(print_like_dislike, None, None)
|
|
|
1427 |
|
1428 |
|
1429 |
|
1430 |
+
|
1431 |
# import gradio as gr
|
1432 |
# import requests
|
1433 |
# import os
|