IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Sep 9, 2024

Commit

f8afb87

verified ·

1 Parent(s): 311f14d

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -212

app.py CHANGED Viewed

@@ -356,34 +356,15 @@ Sure! Here's the information you requested:
 """
-# def generate_bot_response(history, choice, retrieval_mode, model_choice):
-#     if not history:
-#         return
-#     # Select the model
-#     # selected_model = chat_model if model_choice == "LM-1" else phi_pipe
-#     selected_model = chat_model if model_choice == "LM-1" else (chat_model1 if model_choice == "LM-3" else phi_pipe)
-#     response, addresses = generate_answer(history[-1][0], choice, retrieval_mode, selected_model)
-#     history[-1][1] = ""
-#     for character in response:
-#         history[-1][1] += character
-#         yield history  # Stream each character as it is generated
-#         time.sleep(0.05)  # Add a slight delay to simulate streaming
-#     yield history  # Final yield with the complete response
-# Modified bot function to separate chatbot response and TTS generation
 def generate_bot_response(history, choice, retrieval_mode, model_choice):
     if not history:
         return
     # Select the model
     selected_model = chat_model if model_choice == "LM-1" else (chat_model1 if model_choice == "LM-3" else phi_pipe)
     response, addresses = generate_answer(history[-1][0], choice, retrieval_mode, selected_model)
     history[-1][1] = ""
@@ -416,70 +397,34 @@ def generate_tts_response(response, tts_choice):
-# import concurrent.futures
-# # Existing bot function with concurrent futures for parallel processing
-# def bot(history, choice, tts_choice, retrieval_mode, model_choice):
-#     # Initialize an empty response
-#     response = ""
-#     # Create a thread pool to handle both text generation and TTS conversion in parallel
-#     with concurrent.futures.ThreadPoolExecutor() as executor:
-#         # Start the bot response generation in parallel
-#         bot_future = executor.submit(generate_bot_response, history, choice, retrieval_mode, model_choice)
-#         # Wait for the text generation to start
-#         for history_chunk in bot_future.result():
-#             response = history_chunk[-1][1]  # Update the response with the current state
-#             yield history_chunk, None  # Stream the text output as it's generated
-#         # Once text is fully generated, start the TTS conversion
-#         tts_future = executor.submit(generate_tts_response, response, tts_choice)
-#         # Get the audio output after TTS is done
-#         audio_path = tts_future.result()
-#         # Stream the final text and audio output
-#         yield history, audio_path
 import concurrent.futures
-import asyncio
-async def bot(history, choice, tts_choice, retrieval_mode, model_choice):
-    response = ""
-    # Start generating text asynchronously
-    text_gen = generate_text(history, choice, retrieval_mode, model_choice)
-    audio_future = None
-    # Iterate over the text generator
-    async for chunk in text_gen:
-        response += chunk
-        history[-1][1] += chunk
-        yield history, None  # Stream the text output as it's generated
-        # Start generating Parler TTS if selected and not started already
-        if tts_choice == "Beta" and audio_future is None:
-            audio_future = asyncio.create_task(generate_audio_parler_tts(response, callback=lambda audio_chunk: yield_audio(audio_chunk)))
-    # Wait for the audio to finish streaming if it was started
-    if audio_future is not None:
-        await audio_future
-def yield_audio(audio_chunk):
-    """ Stream audio in chunks to the output """
-    temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_chunk_{int(time.time())}.wav")
-    write_wav(temp_audio_path, 16000, audio_chunk.astype(np.float32))
-    return temp_audio_path
-# Text generator as an async generator
-async def generate_text(history, choice, retrieval_mode, model_choice):
-    # Simulate text generation chunk by chunk
-    text_to_generate = "Generating text response..."
-    for char in text_to_generate:
-        await asyncio.sleep(0.05)  # Simulate time delay between character generation
-        yield char  # Yield each character as it's generated
@@ -507,21 +452,11 @@ def generate_bot_response(history, choice, retrieval_mode, model_choice):
-# def generate_audio_after_text(response, tts_choice):
-#     # Generate TTS audio after text response is completed
-#     with concurrent.futures.ThreadPoolExecutor() as executor:
-#         tts_future = executor.submit(generate_tts_response, response, tts_choice)
-#         audio_path = tts_future.result()
-#         return audio_path
 def generate_audio_after_text(response, tts_choice):
     # Generate TTS audio after text response is completed
     with concurrent.futures.ThreadPoolExecutor() as executor:
-        if tts_choice == "Alpha":
-            audio_future = executor.submit(generate_audio_elevenlabs, response)
-        elif tts_choice == "Beta":
-            audio_future = executor.submit(generate_audio_parler_tts, response)  # Use the updated Parler TTS generator
-        audio_path = audio_future.result()
         return audio_path
 import re
@@ -766,9 +701,9 @@ def generate_image(prompt):
         ).images[0]
     return image
-hardcoded_prompt_1 = "Give a high quality photograph of a great looking red 2026 Toyota coupe against a skyline setting in the night, michael mann style in omaha enticing the consumer to buy this product"
-hardcoded_prompt_2 = "A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
-hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
 def update_images():
     image_1 = generate_image(hardcoded_prompt_1)
@@ -960,79 +895,6 @@ def generate_audio_elevenlabs(text):
 # chunking audio and then Process
-# import concurrent.futures
-# import tempfile
-# import os
-# import numpy as np
-# import logging
-# from queue import Queue
-# from threading import Thread
-# from scipy.io.wavfile import write as write_wav
-# from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
-# from transformers import AutoTokenizer
-# # Ensure your device is set to CUDA
-# device = "cuda:0" if torch.cuda.is_available() else "cpu"
-# repo_id = "parler-tts/parler-tts-mini-v1"
-# def generate_audio_parler_tts(text):
-#     description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
-#     chunk_size_in_s = 0.5
-#     # Initialize the tokenizer and model
-#     parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
-#     parler_model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
-#     sampling_rate = parler_model.audio_encoder.config.sampling_rate
-#     frame_rate = parler_model.audio_encoder.config.frame_rate
-#     def generate(text, description, play_steps_in_s=0.5):
-#         play_steps = int(frame_rate * play_steps_in_s)
-#         streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
-#         inputs = parler_tokenizer(description, return_tensors="pt").to(device)
-#         prompt = parler_tokenizer(text, return_tensors="pt").to(device)
-#         generation_kwargs = dict(
-#             input_ids=inputs.input_ids,
-#             prompt_input_ids=prompt.input_ids,
-#             attention_mask=inputs.attention_mask,
-#             prompt_attention_mask=prompt.attention_mask,
-#             streamer=streamer,
-#             do_sample=True,
-#             temperature=1.0,
-#             min_new_tokens=10,
-#         )
-#         thread = Thread(target=parler_model.generate, kwargs=generation_kwargs)
-#         thread.start()
-#         for new_audio in streamer:
-#             if new_audio.shape[0] == 0:
-#                 break
-#             # Save or process each audio chunk as it is generated
-#             yield sampling_rate, new_audio
-#     audio_segments = []
-#     for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
-#         audio_segments.append(audio_chunk)
-#         temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_chunk_{len(audio_segments)}.wav")
-#         write_wav(temp_audio_path, sampling_rate, audio_chunk.astype(np.float32))
-#         logging.debug(f"Saved chunk to {temp_audio_path}")
-#     # Combine all the audio chunks into one audio file
-#     combined_audio = np.concatenate(audio_segments)
-#     combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
-#     write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
-#     logging.debug(f"Combined audio saved to {combined_audio_path}")
-#     return combined_audio_path
-import asyncio
 import concurrent.futures
 import tempfile
 import os
@@ -1049,10 +911,9 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
 repo_id = "parler-tts/parler-tts-mini-v1"
-# Async function to stream Parler TTS in chunks
-async def generate_audio_parler_tts(text, callback=None):
     description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
-    chunk_size_in_s = 3.0  # Set to 3-second chunks
     # Initialize the tokenizer and model
     parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
@@ -1060,7 +921,7 @@ async def generate_audio_parler_tts(text, callback=None):
     sampling_rate = parler_model.audio_encoder.config.sampling_rate
     frame_rate = parler_model.audio_encoder.config.frame_rate
-    def generate(text, description, play_steps_in_s=3.0):
         play_steps = int(frame_rate * play_steps_in_s)
         streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
@@ -1070,6 +931,8 @@ async def generate_audio_parler_tts(text, callback=None):
         generation_kwargs = dict(
             input_ids=inputs.input_ids,
             prompt_input_ids=prompt.input_ids,
             streamer=streamer,
             do_sample=True,
             temperature=1.0,
@@ -1082,26 +945,28 @@ async def generate_audio_parler_tts(text, callback=None):
         for new_audio in streamer:
             if new_audio.shape[0] == 0:
                 break
-            if callback:
-                callback(new_audio)  # Send the chunk to the callback function for streaming
             yield sampling_rate, new_audio
     audio_segments = []
     for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
         audio_segments.append(audio_chunk)
-        await asyncio.sleep(0)  # Allow other tasks to run
-    # Combine all the audio chunks into one audio file after streaming
     combined_audio = np.concatenate(audio_segments)
     combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
     write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
     logging.debug(f"Combined audio saved to {combined_audio_path}")
     return combined_audio_path
 def fetch_local_events():
     api_key = os.environ['SERP_API']
     url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
@@ -1527,25 +1392,13 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             #     .then(fn=clear_textbox, inputs=[], outputs=[chat_input],api_name="api_clear_textbox")
             # )
-            # retriever_sequence = (
-            #     retriever_button.click(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording")
-            #     .then(fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory")
-            #     # First, generate the bot response
-            #     .then(fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response")
-            #     # Then, generate the TTS response based on the bot's response
-            #     .then(fn=generate_tts_response, inputs=[chatbot, tts_choice], outputs=[audio_output], api_name="api_generate_tts_response")
-            #     .then(fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details")
-            #     .then(fn=clear_textbox, inputs=[], outputs=[chat_input], api_name="api_clear_textbox")
-            # )
-            # Gradio bot interaction with audio streaming
             retriever_sequence = (
                 retriever_button.click(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording")
                 .then(fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory")
                 # First, generate the bot response
                 .then(fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response")
-                # Generate the TTS response based on the bot's response concurrently
-                .then(fn=bot, inputs=[chatbot, choice, tts_choice, retrieval_mode, model_choice], outputs=[chatbot, audio_output], api_name="api_generate_tts_response")
                 .then(fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details")
                 .then(fn=clear_textbox, inputs=[], outputs=[chat_input], api_name="api_clear_textbox")
             )
@@ -1564,25 +1417,14 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             #     fn=clear_textbox, inputs=[], outputs=[chat_input],api_name="api_clear_textbox"
             # )
-            # chat_input.submit(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording").then(
-            #     fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory"
-            # ).then(
-            #     # First, generate the bot response
-            #     fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response"
-            # ).then(
-            #     # Then, generate the TTS response based on the bot's response
-            #     fn=generate_tts_response, inputs=[chatbot, tts_choice], outputs=[audio_output], api_name="api_generate_tts_response"
-            # ).then(
-            #     fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details"
-            # ).then(
-            #     fn=clear_textbox, inputs=[], outputs=[chat_input], api_name="api_clear_textbox"
-            # )
-            # The same logic for chat_input submission
             chat_input.submit(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording").then(
                 fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory"
             ).then(
-                fn=bot, inputs=[chatbot, choice, tts_choice, retrieval_mode, model_choice], outputs=[chatbot, audio_output], api_name="api_generate_tts_response"
             ).then(
                 fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details"
             ).then(
@@ -1594,6 +1436,7 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1)
             audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="api_voice_to_text")
@@ -1614,11 +1457,4 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3], api_name="update_image")
 demo.queue()
-demo.launch(show_error=True)

 """
 def generate_bot_response(history, choice, retrieval_mode, model_choice):
     if not history:
         return
     # Select the model
+    # selected_model = chat_model if model_choice == "LM-1" else phi_pipe
     selected_model = chat_model if model_choice == "LM-1" else (chat_model1 if model_choice == "LM-3" else phi_pipe)
     response, addresses = generate_answer(history[-1][0], choice, retrieval_mode, selected_model)
     history[-1][1] = ""
 import concurrent.futures
+# Existing bot function with concurrent futures for parallel processing
+def bot(history, choice, tts_choice, retrieval_mode, model_choice):
+    # Initialize an empty response
+    response = ""
+    # Create a thread pool to handle both text generation and TTS conversion in parallel
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        # Start the bot response generation in parallel
+        bot_future = executor.submit(generate_bot_response, history, choice, retrieval_mode, model_choice)
+        # Wait for the text generation to start
+        for history_chunk in bot_future.result():
+            response = history_chunk[-1][1]  # Update the response with the current state
+            yield history_chunk, None  # Stream the text output as it's generated
+        # Once text is fully generated, start the TTS conversion
+        tts_future = executor.submit(generate_tts_response, response, tts_choice)
+        # Get the audio output after TTS is done
+        audio_path = tts_future.result()
+        # Stream the final text and audio output
+        yield history, audio_path
 def generate_audio_after_text(response, tts_choice):
     # Generate TTS audio after text response is completed
     with concurrent.futures.ThreadPoolExecutor() as executor:
+        tts_future = executor.submit(generate_tts_response, response, tts_choice)
+        audio_path = tts_future.result()
         return audio_path
 import re
         ).images[0]
     return image
+hardcoded_prompt_1 = "A high quality cinematic image for Toyota Truck in Birmingham skyline shot in th style of Michael Mann"
+hardcoded_prompt_2 = "A high quality cinematic image for Alabama Quarterback close up emotional shot in th style of Michael Mann"
+hardcoded_prompt_3 = "A high quality cinematic image for Taylor Swift concert in Birmingham skyline style of Michael Mann"
 def update_images():
     image_1 = generate_image(hardcoded_prompt_1)
 # chunking audio and then Process
 import concurrent.futures
 import tempfile
 import os
 repo_id = "parler-tts/parler-tts-mini-v1"
+def generate_audio_parler_tts(text):
     description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
+    chunk_size_in_s = 0.5
     # Initialize the tokenizer and model
     parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
     sampling_rate = parler_model.audio_encoder.config.sampling_rate
     frame_rate = parler_model.audio_encoder.config.frame_rate
+    def generate(text, description, play_steps_in_s=0.5):
         play_steps = int(frame_rate * play_steps_in_s)
         streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
         generation_kwargs = dict(
             input_ids=inputs.input_ids,
             prompt_input_ids=prompt.input_ids,
+            attention_mask=inputs.attention_mask,
+            prompt_attention_mask=prompt.attention_mask,
             streamer=streamer,
             do_sample=True,
             temperature=1.0,
         for new_audio in streamer:
             if new_audio.shape[0] == 0:
                 break
+            # Save or process each audio chunk as it is generated
             yield sampling_rate, new_audio
     audio_segments = []
     for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
         audio_segments.append(audio_chunk)
+        temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_chunk_{len(audio_segments)}.wav")
+        write_wav(temp_audio_path, sampling_rate, audio_chunk.astype(np.float32))
+        logging.debug(f"Saved chunk to {temp_audio_path}")
+    # Combine all the audio chunks into one audio file
     combined_audio = np.concatenate(audio_segments)
     combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
     write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
     logging.debug(f"Combined audio saved to {combined_audio_path}")
     return combined_audio_path
 def fetch_local_events():
     api_key = os.environ['SERP_API']
     url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
             #     .then(fn=clear_textbox, inputs=[], outputs=[chat_input],api_name="api_clear_textbox")
             # )
             retriever_sequence = (
                 retriever_button.click(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording")
                 .then(fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory")
                 # First, generate the bot response
                 .then(fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response")
+                # Then, generate the TTS response based on the bot's response
+                .then(fn=generate_tts_response, inputs=[chatbot, tts_choice], outputs=[audio_output], api_name="api_generate_tts_response")
                 .then(fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details")
                 .then(fn=clear_textbox, inputs=[], outputs=[chat_input], api_name="api_clear_textbox")
             )
             #     fn=clear_textbox, inputs=[], outputs=[chat_input],api_name="api_clear_textbox"
             # )
             chat_input.submit(fn=stop_audio, inputs=[], outputs=[audio_output], api_name="api_stop_audio_recording").then(
                 fn=add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input], api_name="api_addprompt_chathistory"
             ).then(
+                # First, generate the bot response
+                fn=generate_bot_response, inputs=[chatbot, choice, retrieval_mode, model_choice], outputs=[chatbot], api_name="api_generate_bot_response"
+            ).then(
+                # Then, generate the TTS response based on the bot's response
+                fn=generate_tts_response, inputs=[chatbot, tts_choice], outputs=[audio_output], api_name="api_generate_tts_response"
             ).then(
                 fn=show_map_if_details, inputs=[chatbot, choice], outputs=[location_output, location_output], api_name="api_show_map_details"
             ).then(
             audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1)
             audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="api_voice_to_text")
             refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3], api_name="update_image")
 demo.queue()
+demo.launch(show_error=True)