IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Jul 6, 2024

Commit

16f0b32

verified ·

1 Parent(s): cfcb1b1

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -34

app.py CHANGED Viewed

@@ -275,26 +275,46 @@ def generate_answer(message, choice):
     addresses = extract_addresses(response['output'])
     return response['output'], addresses
 def bot(history, choice, tts_model):
     if not history:
         return history
     response, addresses = generate_answer(history[-1][0], choice)
     history[-1][1] = ""
-    # Generate audio for the entire response in a separate thread
     with concurrent.futures.ThreadPoolExecutor() as executor:
         if tts_model == "ElevenLabs":
             audio_future = executor.submit(generate_audio_elevenlabs, response)
         else:
             audio_future = executor.submit(generate_audio_parler_tts, response)
         for character in response:
             history[-1][1] += character
-            time.sleep(0.05)  # Adjust the speed of text appearance
-            yield history, None
         audio_path = audio_future.result()
-        yield history, audio_path
 def add_message(history, message):
     history.append((message, None))
@@ -567,9 +587,65 @@ def generate_audio_elevenlabs(text):
 #     logging.debug(f"Audio saved to {temp_audio_path}")
 #     return temp_audio_path
 def generate_audio_parler_tts(text, chunk_size=200):
     def split_text(text, chunk_size):
-        # Split text into chunks of the specified size
         words = text.split()
         chunks = []
         current_chunk = []
@@ -588,7 +664,13 @@ def generate_audio_parler_tts(text, chunk_size=200):
             chunks.append(" ".join(current_chunk))
         return chunks
     model_id = 'parler-tts/parler_tts_mini_v0.1'
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     try:
@@ -603,13 +685,11 @@ def generate_audio_parler_tts(text, chunk_size=200):
     input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
     chunks = split_text(text, chunk_size)
-    audio_arrs = []
-    for chunk in chunks:
-        prompt_input_ids = tokenizer(chunk, return_tensors="pt").input_ids.to(device)
-        generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
-        audio_arr = generation.cpu().numpy().squeeze()
-        audio_arrs.append(audio_arr)
     # Concatenate all audio arrays into a single array
     concatenated_audio = np.concatenate(audio_arrs)
@@ -650,47 +730,97 @@ def update_images():
     image_3 = generate_image(hardcoded_prompt_3)
     return image_1, image_2, image_3
-with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
     with gr.Row():
         with gr.Column():
             state = gr.State()
             chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
             choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
             tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
             chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
-            bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
             bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
             chatbot.like(print_like_dislike, None, None)
             clear_button = gr.Button("Clear")
             clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
             audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
             audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
-            # gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
-            # location_output = gr.HTML()
-            # bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
-        # with gr.Column():
-        #     weather_output = gr.HTML(value=fetch_local_weather())
-        #     news_output = gr.HTML(value=fetch_local_news())
-        #     news_output = gr.HTML(value=fetch_local_events())
         with gr.Column():
             image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
             image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
             image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
             refresh_button = gr.Button("Refresh Images")
             refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
 demo.queue()
 demo.launch(share=True)

     addresses = extract_addresses(response['output'])
     return response['output'], addresses
+# def bot(history, choice, tts_model):
+#     if not history:
+#         return history
+#     response, addresses = generate_answer(history[-1][0], choice)
+#     history[-1][1] = ""
+#     # Generate audio for the entire response in a separate thread
+#     with concurrent.futures.ThreadPoolExecutor() as executor:
+#         if tts_model == "ElevenLabs":
+#             audio_future = executor.submit(generate_audio_elevenlabs, response)
+#         else:
+#             audio_future = executor.submit(generate_audio_parler_tts, response)
+#         for character in response:
+#             history[-1][1] += character
+#             time.sleep(0.05)  # Adjust the speed of text appearance
+#             yield history, None
+#         audio_path = audio_future.result()
+#         yield history, audio_path
 def bot(history, choice, tts_model):
     if not history:
         return history
     response, addresses = generate_answer(history[-1][0], choice)
     history[-1][1] = ""
     with concurrent.futures.ThreadPoolExecutor() as executor:
         if tts_model == "ElevenLabs":
             audio_future = executor.submit(generate_audio_elevenlabs, response)
         else:
             audio_future = executor.submit(generate_audio_parler_tts, response)
         for character in response:
             history[-1][1] += character
+            time.sleep(0.05)
+            yield history, None, gr.update(visible=True, value=history[-1][1])
         audio_path = audio_future.result()
+        yield history, audio_path, gr.update(visible=True, value=history[-1][1])
 def add_message(history, message):
     history.append((message, None))
 #     logging.debug(f"Audio saved to {temp_audio_path}")
 #     return temp_audio_path
+# def generate_audio_parler_tts(text, chunk_size=200):
+#     def split_text(text, chunk_size):
+#         # Split text into chunks of the specified size
+#         words = text.split()
+#         chunks = []
+#         current_chunk = []
+#         current_length = 0
+#         for word in words:
+#             if current_length + len(word) + 1 > chunk_size:
+#                 chunks.append(" ".join(current_chunk))
+#                 current_chunk = [word]
+#                 current_length = len(word) + 1
+#             else:
+#                 current_chunk.append(word)
+#                 current_length += len(word) + 1
+#         if current_chunk:
+#             chunks.append(" ".join(current_chunk))
+#         return chunks
+#     model_id = 'parler-tts/parler_tts_mini_v0.1'
+#     device = "cuda:0" if torch.cuda.is_available() else "cpu"
+#     try:
+#         model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
+#     except torch.cuda.OutOfMemoryError:
+#         print("CUDA out of memory. Switching to CPU.")
+#         device = "cpu"
+#         model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
+#     tokenizer = AutoTokenizer.from_pretrained(model_id)
+#     description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
+#     input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
+#     chunks = split_text(text, chunk_size)
+#     audio_arrs = []
+#     for chunk in chunks:
+#         prompt_input_ids = tokenizer(chunk, return_tensors="pt").input_ids.to(device)
+#         generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
+#         audio_arr = generation.cpu().numpy().squeeze()
+#         audio_arrs.append(audio_arr)
+#     # Concatenate all audio arrays into a single array
+#     concatenated_audio = np.concatenate(audio_arrs)
+#     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+#         sf.write(f.name, concatenated_audio, model.config.sampling_rate)
+#         temp_audio_path = f.name
+#     logging.debug(f"Audio saved to {temp_audio_path}")
+#     return temp_audio_path
+import concurrent.futures
 def generate_audio_parler_tts(text, chunk_size=200):
     def split_text(text, chunk_size):
         words = text.split()
         chunks = []
         current_chunk = []
             chunks.append(" ".join(current_chunk))
         return chunks
+    def process_chunk(chunk):
+        prompt_input_ids = tokenizer(chunk, return_tensors="pt").input_ids.to(device)
+        generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
+        audio_arr = generation.cpu().numpy().squeeze()
+        return audio_arr
     model_id = 'parler-tts/parler_tts_mini_v0.1'
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     try:
     input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
     chunks = split_text(text, chunk_size)
+    # Process chunks in parallel
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = [executor.submit(process_chunk, chunk) for chunk in chunks]
+        audio_arrs = [future.result() for future in concurrent.futures.as_completed(futures)]
     # Concatenate all audio arrays into a single array
     concatenated_audio = np.concatenate(audio_arrs)
     image_3 = generate_image(hardcoded_prompt_3)
     return image_1, image_2, image_3
+# with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
+#     with gr.Row():
+#         with gr.Column():
+#             state = gr.State()
+#             chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
+#             choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
+#             tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
+#             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
+#             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
+#             chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
+#             bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
+#             bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
+#             chatbot.like(print_like_dislike, None, None)
+#             clear_button = gr.Button("Clear")
+#             clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
+#             audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
+#             audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
+#             # gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
+#             # location_output = gr.HTML()
+#             # bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
+#         # with gr.Column():
+#         #     weather_output = gr.HTML(value=fetch_local_weather())
+#         #     news_output = gr.HTML(value=fetch_local_news())
+#         #     news_output = gr.HTML(value=fetch_local_events())
+#         with gr.Column():
+#             image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
+#             image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
+#             image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
+#             refresh_button = gr.Button("Refresh Images")
+#             refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
+# demo.queue()
+# demo.launch(share=True)
+def generate_follow_up_buttons(response):
+    return gr.update(visible=True), gr.update(value=response)
+def handle_follow_up_choice(choice, history):
+    follow_up_responses = {
+        "Question 1": "This is the response to follow-up question 1.",
+        "Question 2": "This is the response to follow-up question 2."
+    }
+    follow_up_response = follow_up_responses.get(choice, "Sorry, I didn't understand that choice.")
+    history.append((choice, follow_up_response))
+    return history, gr.update(visible=False)
+with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
     with gr.Row():
         with gr.Column():
             state = gr.State()
             chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
             choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
             tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
             chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
+            bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True), gr.Button(value="Follow-up Questions")])
             bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
+            follow_up_buttons = gr.ButtonGroup(choices=["Question 1", "Question 2"], visible=False)
+            follow_up_buttons.click(handle_follow_up_choice, inputs=[follow_up_buttons, chatbot], outputs=[chatbot, follow_up_buttons])
             chatbot.like(print_like_dislike, None, None)
             clear_button = gr.Button("Clear")
             clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
             audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
             audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
         with gr.Column():
             image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
             image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
             image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
             refresh_button = gr.Button("Refresh Images")
             refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
 demo.queue()
 demo.launch(share=True)