IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Jul 5, 2024

Commit

e5a569c

verified ·

1 Parent(s): bba393e

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -13

app.py CHANGED Viewed

@@ -1,3 +1,13 @@
 import gradio as gr
 import requests
 import os
@@ -14,6 +24,7 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 from googlemaps import Client as GoogleMapsClient
 from gtts import gTTS
 from diffusers import StableDiffusion3Pipeline
 from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 from langchain_pinecone import PineconeVectorStore
@@ -264,7 +275,7 @@ def generate_answer(message, choice):
     addresses = extract_addresses(response['output'])
     return response['output'], addresses
-def bot(history, choice):
     if not history:
         return history
     response, addresses = generate_answer(history[-1][0], choice)
@@ -272,7 +283,10 @@ def bot(history, choice):
     # Generate audio for the entire response in a separate thread
     with concurrent.futures.ThreadPoolExecutor() as executor:
-        audio_future = executor.submit(generate_audio_elevenlabs, response)
         for character in response:
             history[-1][1] += character
@@ -293,13 +307,6 @@ def extract_addresses(response):
     if not isinstance(response, str):
         response = str(response)
     address_patterns = [
-        # r'([A-Z].*,\sOmaha,\sNE\s\d{5})',
-        # r'(\d{4}\s.*,\sOmaha,\sNE\s\d{5})',
-        # r'([A-Z].*,\sNE\s\d{5})',
-        # r'([A-Z].*,.*\sSt,\sOmaha,\sNE\s\d{5})',
-        # r'([A-Z].*,.*\sStreets,\sOmaha,\sNE\s\d{5})',
-        # r'(\d{2}.*\sStreets)',
-        # r'([A-Z].*\s\d{2},\sOmaha,\sNE\s\d{5})'
         r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
         r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
         r'([A-Z].*,\sAL\s\d{5})',
@@ -415,6 +422,8 @@ def fetch_local_news():
 import numpy as np
 import torch
 from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 model_id = 'openai/whisper-large-v3'
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -497,6 +506,27 @@ def generate_audio_elevenlabs(text):
         logging.error(f"Error generating audio: {response.text}")
         return None
 # Stable Diffusion setup
 pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
 pipe = pipe.to("cuda")
@@ -521,8 +551,6 @@ def update_images():
     image_3 = generate_image(hardcoded_prompt_3)
     return image_1, image_2, image_3
 with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
     with gr.Row():
@@ -531,11 +559,12 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
             choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
             chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
-            bot_msg = chat_msg.then(bot, [chatbot, choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
             bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
             chatbot.like(print_like_dislike, None, None)
             clear_button = gr.Button("Clear")
@@ -566,4 +595,3 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
 demo.queue()
 demo.launch(share=True)

+import subprocess
+import sys
+def install_parler_tts():
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/parler-tts.git"])
+# Call the function to install parler-tts
+install_parler_tts()
 import gradio as gr
 import requests
 import os
 from googlemaps import Client as GoogleMapsClient
 from gtts import gTTS
 from diffusers import StableDiffusion3Pipeline
+import soundfile as sf
 from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 from langchain_pinecone import PineconeVectorStore
     addresses = extract_addresses(response['output'])
     return response['output'], addresses
+def bot(history, choice, tts_model):
     if not history:
         return history
     response, addresses = generate_answer(history[-1][0], choice)
     # Generate audio for the entire response in a separate thread
     with concurrent.futures.ThreadPoolExecutor() as executor:
+        if tts_model == "ElevenLabs":
+            audio_future = executor.submit(generate_audio_elevenlabs, response)
+        else:
+            audio_future = executor.submit(generate_audio_parler_tts, response)
         for character in response:
             history[-1][1] += character
     if not isinstance(response, str):
         response = str(response)
     address_patterns = [
         r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
         r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
         r'([A-Z].*,\sAL\s\d{5})',
 import numpy as np
 import torch
 from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
+from parler_tts import ParlerTTSForConditionalGeneration
+from transformers import AutoTokenizer
 model_id = 'openai/whisper-large-v3'
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
         logging.error(f"Error generating audio: {response.text}")
         return None
+def generate_audio_parler_tts(text):
+    model_id = 'parler-tts/parler_tts_mini_v0.1'
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
+    input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
+    prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
+    generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
+    audio_arr = generation.cpu().numpy().squeeze()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+        sf.write(f.name, audio_arr, model.config.sampling_rate)
+        temp_audio_path = f.name
+    logging.debug(f"Audio saved to {temp_audio_path}")
+    return temp_audio_path
 # Stable Diffusion setup
 pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
 pipe = pipe.to("cuda")
     image_3 = generate_image(hardcoded_prompt_3)
     return image_1, image_2, image_3
 with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
     with gr.Row():
             chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
             choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
+            tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
             chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
+            bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
             bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
             chatbot.like(print_like_dislike, None, None)
             clear_button = gr.Button("Clear")
 demo.queue()
 demo.launch(share=True)