Pijush2023 commited on
Commit
7f250f0
·
verified ·
1 Parent(s): 84d46a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -20
app.py CHANGED
@@ -444,31 +444,30 @@ def generate_tts_response(response, tts_choice):
444
 
445
  import concurrent.futures
446
 
447
- # Modified bot function to handle text and audio concurrently
448
- def bot(history, choice, tts_choice, retrieval_mode, model_choice):
 
449
  # Initialize an empty response
450
  response = ""
451
 
452
- # Create a thread pool to handle both text generation and TTS conversion in parallel
453
- with concurrent.futures.ThreadPoolExecutor() as executor:
454
- # Start the bot response generation in parallel
455
- bot_future = executor.submit(generate_bot_response, history, choice, retrieval_mode, model_choice)
456
 
457
- # Wait for the text generation to start
458
- for history_chunk in bot_future.result():
459
- response = history_chunk[-1][1] # Update the response with the current state
460
- yield history_chunk, None # Stream the text output as it's generated
 
 
461
 
462
- # Start streaming Parler TTS as text is being generated
463
- if tts_choice == "Beta": # Parler TTS
464
- parler_tts_future = executor.submit(generate_audio_parler_tts, response, callback=lambda audio_chunk: yield_audio(audio_chunk))
465
- parler_tts_future.result()
466
 
467
- # Once text is fully generated, start the Eleven Labs TTS if chosen
468
- if tts_choice == "Alpha": # Eleven Labs
469
- tts_future = executor.submit(generate_tts_response, response, tts_choice)
470
- audio_path = tts_future.result()
471
- yield history, audio_path
472
 
473
  def yield_audio(audio_chunk):
474
  """ Stream audio in chunks to the output """
@@ -476,6 +475,11 @@ def yield_audio(audio_chunk):
476
  write_wav(temp_audio_path, 16000, audio_chunk.astype(np.float32))
477
  return temp_audio_path
478
 
 
 
 
 
 
479
 
480
 
481
 
@@ -1028,6 +1032,7 @@ def generate_audio_elevenlabs(text):
1028
  # return combined_audio_path
1029
 
1030
 
 
1031
  import concurrent.futures
1032
  import tempfile
1033
  import os
@@ -1044,7 +1049,8 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
1044
 
1045
  repo_id = "parler-tts/parler-tts-mini-v1"
1046
 
1047
- def generate_audio_parler_tts(text, callback=None):
 
1048
  description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
1049
  chunk_size_in_s = 3.0 # Set to 3-second chunks
1050
 
@@ -1083,6 +1089,7 @@ def generate_audio_parler_tts(text, callback=None):
1083
  audio_segments = []
1084
  for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
1085
  audio_segments.append(audio_chunk)
 
1086
 
1087
  # Combine all the audio chunks into one audio file after streaming
1088
  combined_audio = np.concatenate(audio_segments)
 
444
 
445
  import concurrent.futures
446
 
447
+ import asyncio
448
+
449
+ async def bot(history, choice, tts_choice, retrieval_mode, model_choice):
450
  # Initialize an empty response
451
  response = ""
452
 
453
+ # Start generating the text and audio in parallel
454
+ text_future = asyncio.create_task(generate_text(history, choice, retrieval_mode, model_choice))
455
+ audio_future = None
 
456
 
457
+ while not text_future.done():
458
+ # Stream the text as it's being generated
459
+ chunk = await text_future
460
+ response += chunk
461
+ history[-1][1] += chunk
462
+ yield history, None # Stream the text output as it's generated
463
 
464
+ # Start generating Parler TTS if selected
465
+ if tts_choice == "Beta" and audio_future is None:
466
+ audio_future = asyncio.create_task(generate_audio_parler_tts(response, callback=lambda audio_chunk: yield_audio(audio_chunk)))
 
467
 
468
+ # Wait for the audio to finish streaming
469
+ if audio_future is not None:
470
+ await audio_future
 
 
471
 
472
  def yield_audio(audio_chunk):
473
  """ Stream audio in chunks to the output """
 
475
  write_wav(temp_audio_path, 16000, audio_chunk.astype(np.float32))
476
  return temp_audio_path
477
 
478
+ async def generate_text(history, choice, retrieval_mode, model_choice):
479
+ # Simulate text generation chunk by chunk
480
+ for char in "Generating text response...":
481
+ await asyncio.sleep(0.05) # Simulate time delay between character generation
482
+ yield char
483
 
484
 
485
 
 
1032
  # return combined_audio_path
1033
 
1034
 
1035
+ import asyncio
1036
  import concurrent.futures
1037
  import tempfile
1038
  import os
 
1049
 
1050
  repo_id = "parler-tts/parler-tts-mini-v1"
1051
 
1052
+ # Async function to stream Parler TTS in chunks
1053
+ async def generate_audio_parler_tts(text, callback=None):
1054
  description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
1055
  chunk_size_in_s = 3.0 # Set to 3-second chunks
1056
 
 
1089
  audio_segments = []
1090
  for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
1091
  audio_segments.append(audio_chunk)
1092
+ await asyncio.sleep(0) # Allow other tasks to run
1093
 
1094
  # Combine all the audio chunks into one audio file after streaming
1095
  combined_audio = np.concatenate(audio_segments)