Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -275,26 +275,46 @@ def generate_answer(message, choice):
|
|
275 |
addresses = extract_addresses(response['output'])
|
276 |
return response['output'], addresses
|
277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
def bot(history, choice, tts_model):
|
279 |
if not history:
|
280 |
return history
|
281 |
response, addresses = generate_answer(history[-1][0], choice)
|
282 |
history[-1][1] = ""
|
283 |
-
|
284 |
-
# Generate audio for the entire response in a separate thread
|
285 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
286 |
if tts_model == "ElevenLabs":
|
287 |
audio_future = executor.submit(generate_audio_elevenlabs, response)
|
288 |
else:
|
289 |
audio_future = executor.submit(generate_audio_parler_tts, response)
|
290 |
-
|
291 |
for character in response:
|
292 |
history[-1][1] += character
|
293 |
-
time.sleep(0.05)
|
294 |
-
yield history, None
|
295 |
-
|
296 |
audio_path = audio_future.result()
|
297 |
-
yield history, audio_path
|
298 |
|
299 |
def add_message(history, message):
|
300 |
history.append((message, None))
|
@@ -567,9 +587,65 @@ def generate_audio_elevenlabs(text):
|
|
567 |
# logging.debug(f"Audio saved to {temp_audio_path}")
|
568 |
# return temp_audio_path
|
569 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
570 |
def generate_audio_parler_tts(text, chunk_size=200):
|
571 |
def split_text(text, chunk_size):
|
572 |
-
# Split text into chunks of the specified size
|
573 |
words = text.split()
|
574 |
chunks = []
|
575 |
current_chunk = []
|
@@ -588,7 +664,13 @@ def generate_audio_parler_tts(text, chunk_size=200):
|
|
588 |
chunks.append(" ".join(current_chunk))
|
589 |
|
590 |
return chunks
|
591 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
592 |
model_id = 'parler-tts/parler_tts_mini_v0.1'
|
593 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
594 |
try:
|
@@ -603,13 +685,11 @@ def generate_audio_parler_tts(text, chunk_size=200):
|
|
603 |
|
604 |
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
605 |
chunks = split_text(text, chunk_size)
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
audio_arr = generation.cpu().numpy().squeeze()
|
612 |
-
audio_arrs.append(audio_arr)
|
613 |
|
614 |
# Concatenate all audio arrays into a single array
|
615 |
concatenated_audio = np.concatenate(audio_arrs)
|
@@ -650,47 +730,97 @@ def update_images():
|
|
650 |
image_3 = generate_image(hardcoded_prompt_3)
|
651 |
return image_1, image_2, image_3
|
652 |
|
653 |
-
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
654 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
655 |
with gr.Row():
|
656 |
with gr.Column():
|
657 |
state = gr.State()
|
658 |
-
|
659 |
chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
|
660 |
choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
|
661 |
tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
|
662 |
-
|
663 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
664 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
665 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
666 |
-
bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
|
667 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
|
|
|
|
|
|
|
|
668 |
chatbot.like(print_like_dislike, None, None)
|
669 |
clear_button = gr.Button("Clear")
|
670 |
clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
|
671 |
-
|
672 |
-
|
673 |
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
|
674 |
audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
|
675 |
|
676 |
-
# gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
|
677 |
-
# location_output = gr.HTML()
|
678 |
-
# bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
|
679 |
-
|
680 |
-
# with gr.Column():
|
681 |
-
# weather_output = gr.HTML(value=fetch_local_weather())
|
682 |
-
# news_output = gr.HTML(value=fetch_local_news())
|
683 |
-
# news_output = gr.HTML(value=fetch_local_events())
|
684 |
-
|
685 |
with gr.Column():
|
686 |
-
|
687 |
image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
|
688 |
image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
|
689 |
image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
|
690 |
|
691 |
-
|
692 |
refresh_button = gr.Button("Refresh Images")
|
693 |
refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
|
694 |
-
|
695 |
demo.queue()
|
696 |
demo.launch(share=True)
|
|
|
275 |
addresses = extract_addresses(response['output'])
|
276 |
return response['output'], addresses
|
277 |
|
278 |
+
# def bot(history, choice, tts_model):
|
279 |
+
# if not history:
|
280 |
+
# return history
|
281 |
+
# response, addresses = generate_answer(history[-1][0], choice)
|
282 |
+
# history[-1][1] = ""
|
283 |
+
|
284 |
+
# # Generate audio for the entire response in a separate thread
|
285 |
+
# with concurrent.futures.ThreadPoolExecutor() as executor:
|
286 |
+
# if tts_model == "ElevenLabs":
|
287 |
+
# audio_future = executor.submit(generate_audio_elevenlabs, response)
|
288 |
+
# else:
|
289 |
+
# audio_future = executor.submit(generate_audio_parler_tts, response)
|
290 |
+
|
291 |
+
# for character in response:
|
292 |
+
# history[-1][1] += character
|
293 |
+
# time.sleep(0.05) # Adjust the speed of text appearance
|
294 |
+
# yield history, None
|
295 |
+
|
296 |
+
# audio_path = audio_future.result()
|
297 |
+
# yield history, audio_path
|
298 |
+
|
299 |
def bot(history, choice, tts_model):
|
300 |
if not history:
|
301 |
return history
|
302 |
response, addresses = generate_answer(history[-1][0], choice)
|
303 |
history[-1][1] = ""
|
304 |
+
|
|
|
305 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
306 |
if tts_model == "ElevenLabs":
|
307 |
audio_future = executor.submit(generate_audio_elevenlabs, response)
|
308 |
else:
|
309 |
audio_future = executor.submit(generate_audio_parler_tts, response)
|
310 |
+
|
311 |
for character in response:
|
312 |
history[-1][1] += character
|
313 |
+
time.sleep(0.05)
|
314 |
+
yield history, None, gr.update(visible=True, value=history[-1][1])
|
315 |
+
|
316 |
audio_path = audio_future.result()
|
317 |
+
yield history, audio_path, gr.update(visible=True, value=history[-1][1])
|
318 |
|
319 |
def add_message(history, message):
|
320 |
history.append((message, None))
|
|
|
587 |
# logging.debug(f"Audio saved to {temp_audio_path}")
|
588 |
# return temp_audio_path
|
589 |
|
590 |
+
# def generate_audio_parler_tts(text, chunk_size=200):
|
591 |
+
# def split_text(text, chunk_size):
|
592 |
+
# # Split text into chunks of the specified size
|
593 |
+
# words = text.split()
|
594 |
+
# chunks = []
|
595 |
+
# current_chunk = []
|
596 |
+
# current_length = 0
|
597 |
+
|
598 |
+
# for word in words:
|
599 |
+
# if current_length + len(word) + 1 > chunk_size:
|
600 |
+
# chunks.append(" ".join(current_chunk))
|
601 |
+
# current_chunk = [word]
|
602 |
+
# current_length = len(word) + 1
|
603 |
+
# else:
|
604 |
+
# current_chunk.append(word)
|
605 |
+
# current_length += len(word) + 1
|
606 |
+
|
607 |
+
# if current_chunk:
|
608 |
+
# chunks.append(" ".join(current_chunk))
|
609 |
+
|
610 |
+
# return chunks
|
611 |
+
|
612 |
+
# model_id = 'parler-tts/parler_tts_mini_v0.1'
|
613 |
+
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
614 |
+
# try:
|
615 |
+
# model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
616 |
+
# except torch.cuda.OutOfMemoryError:
|
617 |
+
# print("CUDA out of memory. Switching to CPU.")
|
618 |
+
# device = "cpu"
|
619 |
+
# model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
620 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_id)
|
621 |
+
|
622 |
+
# description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
623 |
+
|
624 |
+
# input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
625 |
+
# chunks = split_text(text, chunk_size)
|
626 |
+
# audio_arrs = []
|
627 |
+
|
628 |
+
# for chunk in chunks:
|
629 |
+
# prompt_input_ids = tokenizer(chunk, return_tensors="pt").input_ids.to(device)
|
630 |
+
# generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
631 |
+
# audio_arr = generation.cpu().numpy().squeeze()
|
632 |
+
# audio_arrs.append(audio_arr)
|
633 |
+
|
634 |
+
# # Concatenate all audio arrays into a single array
|
635 |
+
# concatenated_audio = np.concatenate(audio_arrs)
|
636 |
+
|
637 |
+
# with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
638 |
+
# sf.write(f.name, concatenated_audio, model.config.sampling_rate)
|
639 |
+
# temp_audio_path = f.name
|
640 |
+
|
641 |
+
# logging.debug(f"Audio saved to {temp_audio_path}")
|
642 |
+
# return temp_audio_path
|
643 |
+
|
644 |
+
|
645 |
+
import concurrent.futures
|
646 |
+
|
647 |
def generate_audio_parler_tts(text, chunk_size=200):
|
648 |
def split_text(text, chunk_size):
|
|
|
649 |
words = text.split()
|
650 |
chunks = []
|
651 |
current_chunk = []
|
|
|
664 |
chunks.append(" ".join(current_chunk))
|
665 |
|
666 |
return chunks
|
667 |
+
|
668 |
+
def process_chunk(chunk):
|
669 |
+
prompt_input_ids = tokenizer(chunk, return_tensors="pt").input_ids.to(device)
|
670 |
+
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
671 |
+
audio_arr = generation.cpu().numpy().squeeze()
|
672 |
+
return audio_arr
|
673 |
+
|
674 |
model_id = 'parler-tts/parler_tts_mini_v0.1'
|
675 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
676 |
try:
|
|
|
685 |
|
686 |
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
687 |
chunks = split_text(text, chunk_size)
|
688 |
+
|
689 |
+
# Process chunks in parallel
|
690 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
691 |
+
futures = [executor.submit(process_chunk, chunk) for chunk in chunks]
|
692 |
+
audio_arrs = [future.result() for future in concurrent.futures.as_completed(futures)]
|
|
|
|
|
693 |
|
694 |
# Concatenate all audio arrays into a single array
|
695 |
concatenated_audio = np.concatenate(audio_arrs)
|
|
|
730 |
image_3 = generate_image(hardcoded_prompt_3)
|
731 |
return image_1, image_2, image_3
|
732 |
|
733 |
+
# with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
734 |
|
735 |
+
# with gr.Row():
|
736 |
+
# with gr.Column():
|
737 |
+
# state = gr.State()
|
738 |
+
|
739 |
+
# chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
|
740 |
+
# choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
|
741 |
+
# tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
|
742 |
+
|
743 |
+
# gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
744 |
+
# chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
745 |
+
# chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
746 |
+
# bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
|
747 |
+
# bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
748 |
+
# chatbot.like(print_like_dislike, None, None)
|
749 |
+
# clear_button = gr.Button("Clear")
|
750 |
+
# clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
|
751 |
+
|
752 |
+
|
753 |
+
# audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
|
754 |
+
# audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
|
755 |
+
|
756 |
+
# # gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
|
757 |
+
# # location_output = gr.HTML()
|
758 |
+
# # bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
|
759 |
+
|
760 |
+
# # with gr.Column():
|
761 |
+
# # weather_output = gr.HTML(value=fetch_local_weather())
|
762 |
+
# # news_output = gr.HTML(value=fetch_local_news())
|
763 |
+
# # news_output = gr.HTML(value=fetch_local_events())
|
764 |
+
|
765 |
+
# with gr.Column():
|
766 |
+
|
767 |
+
# image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
|
768 |
+
# image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
|
769 |
+
# image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
|
770 |
+
|
771 |
+
|
772 |
+
# refresh_button = gr.Button("Refresh Images")
|
773 |
+
# refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
|
774 |
+
|
775 |
+
# demo.queue()
|
776 |
+
# demo.launch(share=True)
|
777 |
+
|
778 |
+
def generate_follow_up_buttons(response):
|
779 |
+
return gr.update(visible=True), gr.update(value=response)
|
780 |
+
|
781 |
+
def handle_follow_up_choice(choice, history):
|
782 |
+
follow_up_responses = {
|
783 |
+
"Question 1": "This is the response to follow-up question 1.",
|
784 |
+
"Question 2": "This is the response to follow-up question 2."
|
785 |
+
}
|
786 |
+
follow_up_response = follow_up_responses.get(choice, "Sorry, I didn't understand that choice.")
|
787 |
+
history.append((choice, follow_up_response))
|
788 |
+
return history, gr.update(visible=False)
|
789 |
+
|
790 |
+
|
791 |
+
|
792 |
+
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
793 |
+
|
794 |
with gr.Row():
|
795 |
with gr.Column():
|
796 |
state = gr.State()
|
|
|
797 |
chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
|
798 |
choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
|
799 |
tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
|
800 |
+
|
801 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
802 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
803 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
804 |
+
bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True), gr.Button(value="Follow-up Questions")])
|
805 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
806 |
+
|
807 |
+
follow_up_buttons = gr.ButtonGroup(choices=["Question 1", "Question 2"], visible=False)
|
808 |
+
follow_up_buttons.click(handle_follow_up_choice, inputs=[follow_up_buttons, chatbot], outputs=[chatbot, follow_up_buttons])
|
809 |
+
|
810 |
chatbot.like(print_like_dislike, None, None)
|
811 |
clear_button = gr.Button("Clear")
|
812 |
clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
|
813 |
+
|
|
|
814 |
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
|
815 |
audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
|
816 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
817 |
with gr.Column():
|
|
|
818 |
image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
|
819 |
image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
|
820 |
image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
|
821 |
|
|
|
822 |
refresh_button = gr.Button("Refresh Images")
|
823 |
refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
|
824 |
+
|
825 |
demo.queue()
|
826 |
demo.launch(share=True)
|