IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Sep 9, 2024

Commit

7f250f0

verified ·

1 Parent(s): 84d46a3

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -20

app.py CHANGED Viewed

@@ -444,31 +444,30 @@ def generate_tts_response(response, tts_choice):
 import concurrent.futures
-# Modified bot function to handle text and audio concurrently
-def bot(history, choice, tts_choice, retrieval_mode, model_choice):
     # Initialize an empty response
     response = ""
-    # Create a thread pool to handle both text generation and TTS conversion in parallel
-    with concurrent.futures.ThreadPoolExecutor() as executor:
-        # Start the bot response generation in parallel
-        bot_future = executor.submit(generate_bot_response, history, choice, retrieval_mode, model_choice)
-        # Wait for the text generation to start
-        for history_chunk in bot_future.result():
-            response = history_chunk[-1][1]  # Update the response with the current state
-            yield history_chunk, None  # Stream the text output as it's generated
-            # Start streaming Parler TTS as text is being generated
-            if tts_choice == "Beta":  # Parler TTS
-                parler_tts_future = executor.submit(generate_audio_parler_tts, response, callback=lambda audio_chunk: yield_audio(audio_chunk))
-                parler_tts_future.result()
-        # Once text is fully generated, start the Eleven Labs TTS if chosen
-        if tts_choice == "Alpha":  # Eleven Labs
-            tts_future = executor.submit(generate_tts_response, response, tts_choice)
-            audio_path = tts_future.result()
-            yield history, audio_path
 def yield_audio(audio_chunk):
     """ Stream audio in chunks to the output """
@@ -476,6 +475,11 @@ def yield_audio(audio_chunk):
     write_wav(temp_audio_path, 16000, audio_chunk.astype(np.float32))
     return temp_audio_path
@@ -1028,6 +1032,7 @@ def generate_audio_elevenlabs(text):
 #     return combined_audio_path
 import concurrent.futures
 import tempfile
 import os
@@ -1044,7 +1049,8 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
 repo_id = "parler-tts/parler-tts-mini-v1"
-def generate_audio_parler_tts(text, callback=None):
     description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
     chunk_size_in_s = 3.0  # Set to 3-second chunks
@@ -1083,6 +1089,7 @@ def generate_audio_parler_tts(text, callback=None):
     audio_segments = []
     for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
         audio_segments.append(audio_chunk)
     # Combine all the audio chunks into one audio file after streaming
     combined_audio = np.concatenate(audio_segments)

 import concurrent.futures
+import asyncio
+async def bot(history, choice, tts_choice, retrieval_mode, model_choice):
     # Initialize an empty response
     response = ""
+    # Start generating the text and audio in parallel
+    text_future = asyncio.create_task(generate_text(history, choice, retrieval_mode, model_choice))
+    audio_future = None
+    while not text_future.done():
+        # Stream the text as it's being generated
+        chunk = await text_future
+        response += chunk
+        history[-1][1] += chunk
+        yield history, None  # Stream the text output as it's generated
+        # Start generating Parler TTS if selected
+        if tts_choice == "Beta" and audio_future is None:
+            audio_future = asyncio.create_task(generate_audio_parler_tts(response, callback=lambda audio_chunk: yield_audio(audio_chunk)))
+    # Wait for the audio to finish streaming
+    if audio_future is not None:
+        await audio_future
 def yield_audio(audio_chunk):
     """ Stream audio in chunks to the output """
     write_wav(temp_audio_path, 16000, audio_chunk.astype(np.float32))
     return temp_audio_path
+async def generate_text(history, choice, retrieval_mode, model_choice):
+    # Simulate text generation chunk by chunk
+    for char in "Generating text response...":
+        await asyncio.sleep(0.05)  # Simulate time delay between character generation
+        yield char
 #     return combined_audio_path
+import asyncio
 import concurrent.futures
 import tempfile
 import os
 repo_id = "parler-tts/parler-tts-mini-v1"
+# Async function to stream Parler TTS in chunks
+async def generate_audio_parler_tts(text, callback=None):
     description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
     chunk_size_in_s = 3.0  # Set to 3-second chunks
     audio_segments = []
     for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
         audio_segments.append(audio_chunk)
+        await asyncio.sleep(0)  # Allow other tasks to run
     # Combine all the audio chunks into one audio file after streaming
     combined_audio = np.concatenate(audio_segments)