Pijush2023 commited on
Commit
f8afb87
·
verified ·
1 Parent(s): 311f14d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -212
app.py CHANGED
@@ -356,34 +356,15 @@ Sure! Here's the information you requested:
356
  """
357
 
358
 
359
- # def generate_bot_response(history, choice, retrieval_mode, model_choice):
360
- # if not history:
361
- # return
362
-
363
- # # Select the model
364
- # # selected_model = chat_model if model_choice == "LM-1" else phi_pipe
365
- # selected_model = chat_model if model_choice == "LM-1" else (chat_model1 if model_choice == "LM-3" else phi_pipe)
366
-
367
-
368
- # response, addresses = generate_answer(history[-1][0], choice, retrieval_mode, selected_model)
369
- # history[-1][1] = ""
370
-
371
- # for character in response:
372
- # history[-1][1] += character
373
- # yield history # Stream each character as it is generated
374
- # time.sleep(0.05) # Add a slight delay to simulate streaming
375
-
376
- # yield history # Final yield with the complete response
377
-
378
-
379
- # Modified bot function to separate chatbot response and TTS generation
380
  def generate_bot_response(history, choice, retrieval_mode, model_choice):
381
  if not history:
382
  return
383
 
384
  # Select the model
 
385
  selected_model = chat_model if model_choice == "LM-1" else (chat_model1 if model_choice == "LM-3" else phi_pipe)
386
 
 
387
  response, addresses = generate_answer(history[-1][0], choice, retrieval_mode, selected_model)
388
  history[-1][1] = ""
389
 
@@ -416,70 +397,34 @@ def generate_tts_response(response, tts_choice):
416
 
417
 
418
 
419
- # import concurrent.futures
420
- # # Existing bot function with concurrent futures for parallel processing
421
- # def bot(history, choice, tts_choice, retrieval_mode, model_choice):
422
- # # Initialize an empty response
423
- # response = ""
424
-
425
- # # Create a thread pool to handle both text generation and TTS conversion in parallel
426
- # with concurrent.futures.ThreadPoolExecutor() as executor:
427
- # # Start the bot response generation in parallel
428
- # bot_future = executor.submit(generate_bot_response, history, choice, retrieval_mode, model_choice)
429
-
430
- # # Wait for the text generation to start
431
- # for history_chunk in bot_future.result():
432
- # response = history_chunk[-1][1] # Update the response with the current state
433
- # yield history_chunk, None # Stream the text output as it's generated
434
-
435
- # # Once text is fully generated, start the TTS conversion
436
- # tts_future = executor.submit(generate_tts_response, response, tts_choice)
437
-
438
- # # Get the audio output after TTS is done
439
- # audio_path = tts_future.result()
440
-
441
- # # Stream the final text and audio output
442
- # yield history, audio_path
443
-
444
-
445
  import concurrent.futures
 
 
 
 
446
 
447
- import asyncio
 
 
 
448
 
449
- async def bot(history, choice, tts_choice, retrieval_mode, model_choice):
450
- response = ""
 
 
 
 
 
451
 
452
- # Start generating text asynchronously
453
- text_gen = generate_text(history, choice, retrieval_mode, model_choice)
454
- audio_future = None
455
 
456
- # Iterate over the text generator
457
- async for chunk in text_gen:
458
- response += chunk
459
- history[-1][1] += chunk
460
- yield history, None # Stream the text output as it's generated
461
 
462
- # Start generating Parler TTS if selected and not started already
463
- if tts_choice == "Beta" and audio_future is None:
464
- audio_future = asyncio.create_task(generate_audio_parler_tts(response, callback=lambda audio_chunk: yield_audio(audio_chunk)))
465
 
466
- # Wait for the audio to finish streaming if it was started
467
- if audio_future is not None:
468
- await audio_future
469
 
470
- def yield_audio(audio_chunk):
471
- """ Stream audio in chunks to the output """
472
- temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_chunk_{int(time.time())}.wav")
473
- write_wav(temp_audio_path, 16000, audio_chunk.astype(np.float32))
474
- return temp_audio_path
475
 
476
- # Text generator as an async generator
477
- async def generate_text(history, choice, retrieval_mode, model_choice):
478
- # Simulate text generation chunk by chunk
479
- text_to_generate = "Generating text response..."
480
- for char in text_to_generate:
481
- await asyncio.sleep(0.05) # Simulate time delay between character generation
482
- yield char # Yield each character as it's generated
483
 
484
 
485
 
@@ -507,21 +452,11 @@ def generate_bot_response(history, choice, retrieval_mode, model_choice):
507
 
508
 
509
 
510
- # def generate_audio_after_text(response, tts_choice):
511
- # # Generate TTS audio after text response is completed
512
- # with concurrent.futures.ThreadPoolExecutor() as executor:
513
- # tts_future = executor.submit(generate_tts_response, response, tts_choice)
514
- # audio_path = tts_future.result()
515
- # return audio_path
516
-
517
  def generate_audio_after_text(response, tts_choice):
518
  # Generate TTS audio after text response is completed
519
  with concurrent.futures.ThreadPoolExecutor() as executor:
520
- if tts_choice == "Alpha":
521
- audio_future = executor.submit(generate_audio_elevenlabs, response)
522
- elif tts_choice == "Beta":
523
- audio_future = executor.submit(generate_audio_parler_tts, response) # Use the updated Parler TTS generator
524
- audio_path = audio_future.result()
525
  return audio_path
526
 
527
  import re
@@ -766,9 +701,9 @@ def generate_image(prompt):
766
  ).images[0]
767
  return image
768
 
769
- hardcoded_prompt_1 = "Give a high quality photograph of a great looking red 2026 Toyota coupe against a skyline setting in the night, michael mann style in omaha enticing the consumer to buy this product"
770
- hardcoded_prompt_2 = "A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
771
- hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
772
 
773
  def update_images():
774
  image_1 = generate_image(hardcoded_prompt_1)
@@ -960,79 +895,6 @@ def generate_audio_elevenlabs(text):
960
 
961
  # chunking audio and then Process
962
 
963
- # import concurrent.futures
964
- # import tempfile
965
- # import os
966
- # import numpy as np
967
- # import logging
968
- # from queue import Queue
969
- # from threading import Thread
970
- # from scipy.io.wavfile import write as write_wav
971
- # from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
972
- # from transformers import AutoTokenizer
973
-
974
- # # Ensure your device is set to CUDA
975
- # device = "cuda:0" if torch.cuda.is_available() else "cpu"
976
-
977
- # repo_id = "parler-tts/parler-tts-mini-v1"
978
-
979
- # def generate_audio_parler_tts(text):
980
- # description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
981
- # chunk_size_in_s = 0.5
982
-
983
- # # Initialize the tokenizer and model
984
- # parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
985
- # parler_model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
986
- # sampling_rate = parler_model.audio_encoder.config.sampling_rate
987
- # frame_rate = parler_model.audio_encoder.config.frame_rate
988
-
989
- # def generate(text, description, play_steps_in_s=0.5):
990
- # play_steps = int(frame_rate * play_steps_in_s)
991
- # streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
992
-
993
- # inputs = parler_tokenizer(description, return_tensors="pt").to(device)
994
- # prompt = parler_tokenizer(text, return_tensors="pt").to(device)
995
-
996
- # generation_kwargs = dict(
997
- # input_ids=inputs.input_ids,
998
- # prompt_input_ids=prompt.input_ids,
999
- # attention_mask=inputs.attention_mask,
1000
- # prompt_attention_mask=prompt.attention_mask,
1001
- # streamer=streamer,
1002
- # do_sample=True,
1003
- # temperature=1.0,
1004
- # min_new_tokens=10,
1005
- # )
1006
-
1007
- # thread = Thread(target=parler_model.generate, kwargs=generation_kwargs)
1008
- # thread.start()
1009
-
1010
- # for new_audio in streamer:
1011
- # if new_audio.shape[0] == 0:
1012
- # break
1013
- # # Save or process each audio chunk as it is generated
1014
- # yield sampling_rate, new_audio
1015
-
1016
- # audio_segments = []
1017
- # for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
1018
- # audio_segments.append(audio_chunk)
1019
-
1020
- # temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_chunk_{len(audio_segments)}.wav")
1021
- # write_wav(temp_audio_path, sampling_rate, audio_chunk.astype(np.float32))
1022
- # logging.debug(f"Saved chunk to {temp_audio_path}")
1023
-
1024
-
1025
- # # Combine all the audio chunks into one audio file
1026
- # combined_audio = np.concatenate(audio_segments)
1027
- # combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
1028
-
1029
- # write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
1030
-
1031
- # logging.debug(f"Combined audio saved to {combined_audio_path}")
1032
- # return combined_audio_path
1033
-
1034
-
1035
- import asyncio
1036
  import concurrent.futures
1037
  import tempfile
1038
  import os
@@ -1049,10 +911,9 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
1049
 
1050
  repo_id = "parler-tts/parler-tts-mini-v1"
1051
 
1052
- # Async function to stream Parler TTS in chunks
1053
- async def generate_audio_parler_tts(text, callback=None):
1054
  description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
1055
- chunk_size_in_s = 3.0 # Set to 3-second chunks
1056
 
1057
  # Initialize the tokenizer and model
1058
  parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
@@ -1060,7 +921,7 @@ async def generate_audio_parler_tts(text, callback=None):
1060
  sampling_rate = parler_model.audio_encoder.config.sampling_rate
1061
  frame_rate = parler_model.audio_encoder.config.frame_rate
1062
 
1063
- def generate(text, description, play_steps_in_s=3.0):
1064
  play_steps = int(frame_rate * play_steps_in_s)
1065
  streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
1066
 
@@ -1070,6 +931,8 @@ async def generate_audio_parler_tts(text, callback=None):
1070
  generation_kwargs = dict(
1071
  input_ids=inputs.input_ids,
1072
  prompt_input_ids=prompt.input_ids,
 
 
1073
  streamer=streamer,
1074
  do_sample=True,
1075
  temperature=1.0,
@@ -1082,26 +945,28 @@ async def generate_audio_parler_tts(text, callback=None):
1082
  for new_audio in streamer:
1083
  if new_audio.shape[0] == 0:
1084
  break
1085
- if callback:
1086
- callback(new_audio) # Send the chunk to the callback function for streaming
1087
  yield sampling_rate, new_audio
1088
 
1089
  audio_segments = []
1090
  for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
1091
  audio_segments.append(audio_chunk)
1092
- await asyncio.sleep(0) # Allow other tasks to run
1093
 
1094
- # Combine all the audio chunks into one audio file after streaming
 
 
 
 
 
1095
  combined_audio = np.concatenate(audio_segments)
1096
  combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
 
1097
  write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
1098
 
1099
  logging.debug(f"Combined audio saved to {combined_audio_path}")
1100
  return combined_audio_path
1101
 
1102
 
1103
-
1104
-
1105
  def fetch_local_events():
1106
  api_key = os.environ['SERP_API']
1107
  url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
@@ -1527,25 +1392,13 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
1527
  # .then(fn=clear_textbox, inputs=[], outputs=[chat_input],api_name="api_clear_textbox")
1528
  # )
1529
 
1530
- # retriever_sequence = (
1531
- # retriever_button.click(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording")
1532
- # .then(fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory")
1533
- # # First, generate the bot response
1534
- # .then(fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response")
1535
- # # Then, generate the TTS response based on the bot's response
1536
- # .then(fn=generate_tts_response, inputs=[chatbot, tts_choice], outputs=[audio_output], api_name="api_generate_tts_response")
1537
- # .then(fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details")
1538
- # .then(fn=clear_textbox, inputs=[], outputs=[chat_input], api_name="api_clear_textbox")
1539
- # )
1540
-
1541
- # Gradio bot interaction with audio streaming
1542
  retriever_sequence = (
1543
  retriever_button.click(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording")
1544
  .then(fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory")
1545
  # First, generate the bot response
1546
  .then(fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response")
1547
- # Generate the TTS response based on the bot's response concurrently
1548
- .then(fn=bot, inputs=[chatbot, choice, tts_choice, retrieval_mode, model_choice], outputs=[chatbot, audio_output], api_name="api_generate_tts_response")
1549
  .then(fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details")
1550
  .then(fn=clear_textbox, inputs=[], outputs=[chat_input], api_name="api_clear_textbox")
1551
  )
@@ -1564,25 +1417,14 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
1564
  # fn=clear_textbox, inputs=[], outputs=[chat_input],api_name="api_clear_textbox"
1565
  # )
1566
 
1567
- # chat_input.submit(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording").then(
1568
- # fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory"
1569
- # ).then(
1570
- # # First, generate the bot response
1571
- # fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response"
1572
- # ).then(
1573
- # # Then, generate the TTS response based on the bot's response
1574
- # fn=generate_tts_response, inputs=[chatbot, tts_choice], outputs=[audio_output], api_name="api_generate_tts_response"
1575
- # ).then(
1576
- # fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details"
1577
- # ).then(
1578
- # fn=clear_textbox, inputs=[], outputs=[chat_input], api_name="api_clear_textbox"
1579
- # )
1580
-
1581
- # The same logic for chat_input submission
1582
  chat_input.submit(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording").then(
1583
  fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory"
1584
  ).then(
1585
- fn=bot, inputs=[chatbot, choice, tts_choice, retrieval_mode, model_choice], outputs=[chatbot, audio_output], api_name="api_generate_tts_response"
 
 
 
 
1586
  ).then(
1587
  fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details"
1588
  ).then(
@@ -1594,6 +1436,7 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
1594
 
1595
 
1596
 
 
1597
  audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1)
1598
  audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="api_voice_to_text")
1599
 
@@ -1614,11 +1457,4 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
1614
  refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3], api_name="update_image")
1615
 
1616
  demo.queue()
1617
- demo.launch(show_error=True)
1618
-
1619
-
1620
-
1621
-
1622
-
1623
-
1624
-
 
356
  """
357
 
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  def generate_bot_response(history, choice, retrieval_mode, model_choice):
360
  if not history:
361
  return
362
 
363
  # Select the model
364
+ # selected_model = chat_model if model_choice == "LM-1" else phi_pipe
365
  selected_model = chat_model if model_choice == "LM-1" else (chat_model1 if model_choice == "LM-3" else phi_pipe)
366
 
367
+
368
  response, addresses = generate_answer(history[-1][0], choice, retrieval_mode, selected_model)
369
  history[-1][1] = ""
370
 
 
397
 
398
 
399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  import concurrent.futures
401
+ # Existing bot function with concurrent futures for parallel processing
402
+ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
403
+ # Initialize an empty response
404
+ response = ""
405
 
406
+ # Create a thread pool to handle both text generation and TTS conversion in parallel
407
+ with concurrent.futures.ThreadPoolExecutor() as executor:
408
+ # Start the bot response generation in parallel
409
+ bot_future = executor.submit(generate_bot_response, history, choice, retrieval_mode, model_choice)
410
 
411
+ # Wait for the text generation to start
412
+ for history_chunk in bot_future.result():
413
+ response = history_chunk[-1][1] # Update the response with the current state
414
+ yield history_chunk, None # Stream the text output as it's generated
415
+
416
+ # Once text is fully generated, start the TTS conversion
417
+ tts_future = executor.submit(generate_tts_response, response, tts_choice)
418
 
419
+ # Get the audio output after TTS is done
420
+ audio_path = tts_future.result()
 
421
 
422
+ # Stream the final text and audio output
423
+ yield history, audio_path
 
 
 
424
 
 
 
 
425
 
 
 
 
426
 
 
 
 
 
 
427
 
 
 
 
 
 
 
 
428
 
429
 
430
 
 
452
 
453
 
454
 
 
 
 
 
 
 
 
455
  def generate_audio_after_text(response, tts_choice):
456
  # Generate TTS audio after text response is completed
457
  with concurrent.futures.ThreadPoolExecutor() as executor:
458
+ tts_future = executor.submit(generate_tts_response, response, tts_choice)
459
+ audio_path = tts_future.result()
 
 
 
460
  return audio_path
461
 
462
  import re
 
701
  ).images[0]
702
  return image
703
 
704
+ hardcoded_prompt_1 = "A high quality cinematic image for Toyota Truck in Birmingham skyline shot in th style of Michael Mann"
705
+ hardcoded_prompt_2 = "A high quality cinematic image for Alabama Quarterback close up emotional shot in th style of Michael Mann"
706
+ hardcoded_prompt_3 = "A high quality cinematic image for Taylor Swift concert in Birmingham skyline style of Michael Mann"
707
 
708
  def update_images():
709
  image_1 = generate_image(hardcoded_prompt_1)
 
895
 
896
  # chunking audio and then Process
897
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898
  import concurrent.futures
899
  import tempfile
900
  import os
 
911
 
912
  repo_id = "parler-tts/parler-tts-mini-v1"
913
 
914
+ def generate_audio_parler_tts(text):
 
915
  description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
916
+ chunk_size_in_s = 0.5
917
 
918
  # Initialize the tokenizer and model
919
  parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
 
921
  sampling_rate = parler_model.audio_encoder.config.sampling_rate
922
  frame_rate = parler_model.audio_encoder.config.frame_rate
923
 
924
+ def generate(text, description, play_steps_in_s=0.5):
925
  play_steps = int(frame_rate * play_steps_in_s)
926
  streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
927
 
 
931
  generation_kwargs = dict(
932
  input_ids=inputs.input_ids,
933
  prompt_input_ids=prompt.input_ids,
934
+ attention_mask=inputs.attention_mask,
935
+ prompt_attention_mask=prompt.attention_mask,
936
  streamer=streamer,
937
  do_sample=True,
938
  temperature=1.0,
 
945
  for new_audio in streamer:
946
  if new_audio.shape[0] == 0:
947
  break
948
+ # Save or process each audio chunk as it is generated
 
949
  yield sampling_rate, new_audio
950
 
951
  audio_segments = []
952
  for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
953
  audio_segments.append(audio_chunk)
 
954
 
955
+ temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_chunk_{len(audio_segments)}.wav")
956
+ write_wav(temp_audio_path, sampling_rate, audio_chunk.astype(np.float32))
957
+ logging.debug(f"Saved chunk to {temp_audio_path}")
958
+
959
+
960
+ # Combine all the audio chunks into one audio file
961
  combined_audio = np.concatenate(audio_segments)
962
  combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
963
+
964
  write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
965
 
966
  logging.debug(f"Combined audio saved to {combined_audio_path}")
967
  return combined_audio_path
968
 
969
 
 
 
970
  def fetch_local_events():
971
  api_key = os.environ['SERP_API']
972
  url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
 
1392
  # .then(fn=clear_textbox, inputs=[], outputs=[chat_input],api_name="api_clear_textbox")
1393
  # )
1394
 
 
 
 
 
 
 
 
 
 
 
 
 
1395
  retriever_sequence = (
1396
  retriever_button.click(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording")
1397
  .then(fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory")
1398
  # First, generate the bot response
1399
  .then(fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response")
1400
+ # Then, generate the TTS response based on the bot's response
1401
+ .then(fn=generate_tts_response, inputs=[chatbot, tts_choice], outputs=[audio_output], api_name="api_generate_tts_response")
1402
  .then(fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details")
1403
  .then(fn=clear_textbox, inputs=[], outputs=[chat_input], api_name="api_clear_textbox")
1404
  )
 
1417
  # fn=clear_textbox, inputs=[], outputs=[chat_input],api_name="api_clear_textbox"
1418
  # )
1419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1420
  chat_input.submit(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording").then(
1421
  fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory"
1422
  ).then(
1423
+ # First, generate the bot response
1424
+ fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response"
1425
+ ).then(
1426
+ # Then, generate the TTS response based on the bot's response
1427
+ fn=generate_tts_response, inputs=[chatbot, tts_choice], outputs=[audio_output], api_name="api_generate_tts_response"
1428
  ).then(
1429
  fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details"
1430
  ).then(
 
1436
 
1437
 
1438
 
1439
+
1440
  audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1)
1441
  audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="api_voice_to_text")
1442
 
 
1457
  refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3], api_name="update_image")
1458
 
1459
  demo.queue()
1460
+ demo.launch(show_error=True)