Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import requests
|
| 3 |
import os
|
|
@@ -14,6 +24,7 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
|
| 14 |
from googlemaps import Client as GoogleMapsClient
|
| 15 |
from gtts import gTTS
|
| 16 |
from diffusers import StableDiffusion3Pipeline
|
|
|
|
| 17 |
|
| 18 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
| 19 |
from langchain_pinecone import PineconeVectorStore
|
|
@@ -264,7 +275,7 @@ def generate_answer(message, choice):
|
|
| 264 |
addresses = extract_addresses(response['output'])
|
| 265 |
return response['output'], addresses
|
| 266 |
|
| 267 |
-
def bot(history, choice):
|
| 268 |
if not history:
|
| 269 |
return history
|
| 270 |
response, addresses = generate_answer(history[-1][0], choice)
|
|
@@ -272,7 +283,10 @@ def bot(history, choice):
|
|
| 272 |
|
| 273 |
# Generate audio for the entire response in a separate thread
|
| 274 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 275 |
-
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
for character in response:
|
| 278 |
history[-1][1] += character
|
|
@@ -293,13 +307,6 @@ def extract_addresses(response):
|
|
| 293 |
if not isinstance(response, str):
|
| 294 |
response = str(response)
|
| 295 |
address_patterns = [
|
| 296 |
-
# r'([A-Z].*,\sOmaha,\sNE\s\d{5})',
|
| 297 |
-
# r'(\d{4}\s.*,\sOmaha,\sNE\s\d{5})',
|
| 298 |
-
# r'([A-Z].*,\sNE\s\d{5})',
|
| 299 |
-
# r'([A-Z].*,.*\sSt,\sOmaha,\sNE\s\d{5})',
|
| 300 |
-
# r'([A-Z].*,.*\sStreets,\sOmaha,\sNE\s\d{5})',
|
| 301 |
-
# r'(\d{2}.*\sStreets)',
|
| 302 |
-
# r'([A-Z].*\s\d{2},\sOmaha,\sNE\s\d{5})'
|
| 303 |
r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
|
| 304 |
r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
|
| 305 |
r'([A-Z].*,\sAL\s\d{5})',
|
|
@@ -415,6 +422,8 @@ def fetch_local_news():
|
|
| 415 |
import numpy as np
|
| 416 |
import torch
|
| 417 |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
|
|
|
|
|
|
| 418 |
|
| 419 |
model_id = 'openai/whisper-large-v3'
|
| 420 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
@@ -497,6 +506,27 @@ def generate_audio_elevenlabs(text):
|
|
| 497 |
logging.error(f"Error generating audio: {response.text}")
|
| 498 |
return None
|
| 499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
# Stable Diffusion setup
|
| 501 |
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
|
| 502 |
pipe = pipe.to("cuda")
|
|
@@ -521,8 +551,6 @@ def update_images():
|
|
| 521 |
image_3 = generate_image(hardcoded_prompt_3)
|
| 522 |
return image_1, image_2, image_3
|
| 523 |
|
| 524 |
-
|
| 525 |
-
|
| 526 |
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
| 527 |
|
| 528 |
with gr.Row():
|
|
@@ -531,11 +559,12 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
| 531 |
|
| 532 |
chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
|
| 533 |
choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
|
|
|
|
| 534 |
|
| 535 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
| 536 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
| 537 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
| 538 |
-
bot_msg = chat_msg.then(bot, [chatbot, choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
|
| 539 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
| 540 |
chatbot.like(print_like_dislike, None, None)
|
| 541 |
clear_button = gr.Button("Clear")
|
|
@@ -566,4 +595,3 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
| 566 |
|
| 567 |
demo.queue()
|
| 568 |
demo.launch(share=True)
|
| 569 |
-
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
def install_parler_tts():
|
| 5 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/parler-tts.git"])
|
| 6 |
+
|
| 7 |
+
# Call the function to install parler-tts
|
| 8 |
+
install_parler_tts()
|
| 9 |
+
|
| 10 |
+
|
| 11 |
import gradio as gr
|
| 12 |
import requests
|
| 13 |
import os
|
|
|
|
| 24 |
from googlemaps import Client as GoogleMapsClient
|
| 25 |
from gtts import gTTS
|
| 26 |
from diffusers import StableDiffusion3Pipeline
|
| 27 |
+
import soundfile as sf
|
| 28 |
|
| 29 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
| 30 |
from langchain_pinecone import PineconeVectorStore
|
|
|
|
| 275 |
addresses = extract_addresses(response['output'])
|
| 276 |
return response['output'], addresses
|
| 277 |
|
| 278 |
+
def bot(history, choice, tts_model):
|
| 279 |
if not history:
|
| 280 |
return history
|
| 281 |
response, addresses = generate_answer(history[-1][0], choice)
|
|
|
|
| 283 |
|
| 284 |
# Generate audio for the entire response in a separate thread
|
| 285 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 286 |
+
if tts_model == "ElevenLabs":
|
| 287 |
+
audio_future = executor.submit(generate_audio_elevenlabs, response)
|
| 288 |
+
else:
|
| 289 |
+
audio_future = executor.submit(generate_audio_parler_tts, response)
|
| 290 |
|
| 291 |
for character in response:
|
| 292 |
history[-1][1] += character
|
|
|
|
| 307 |
if not isinstance(response, str):
|
| 308 |
response = str(response)
|
| 309 |
address_patterns = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
|
| 311 |
r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
|
| 312 |
r'([A-Z].*,\sAL\s\d{5})',
|
|
|
|
| 422 |
import numpy as np
|
| 423 |
import torch
|
| 424 |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
| 425 |
+
from parler_tts import ParlerTTSForConditionalGeneration
|
| 426 |
+
from transformers import AutoTokenizer
|
| 427 |
|
| 428 |
model_id = 'openai/whisper-large-v3'
|
| 429 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 506 |
logging.error(f"Error generating audio: {response.text}")
|
| 507 |
return None
|
| 508 |
|
| 509 |
+
def generate_audio_parler_tts(text):
|
| 510 |
+
model_id = 'parler-tts/parler_tts_mini_v0.1'
|
| 511 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 512 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
| 513 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 514 |
+
|
| 515 |
+
description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
| 516 |
+
|
| 517 |
+
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
| 518 |
+
prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
|
| 519 |
+
|
| 520 |
+
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
| 521 |
+
audio_arr = generation.cpu().numpy().squeeze()
|
| 522 |
+
|
| 523 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 524 |
+
sf.write(f.name, audio_arr, model.config.sampling_rate)
|
| 525 |
+
temp_audio_path = f.name
|
| 526 |
+
|
| 527 |
+
logging.debug(f"Audio saved to {temp_audio_path}")
|
| 528 |
+
return temp_audio_path
|
| 529 |
+
|
| 530 |
# Stable Diffusion setup
|
| 531 |
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
|
| 532 |
pipe = pipe.to("cuda")
|
|
|
|
| 551 |
image_3 = generate_image(hardcoded_prompt_3)
|
| 552 |
return image_1, image_2, image_3
|
| 553 |
|
|
|
|
|
|
|
| 554 |
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
| 555 |
|
| 556 |
with gr.Row():
|
|
|
|
| 559 |
|
| 560 |
chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
|
| 561 |
choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
|
| 562 |
+
tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
|
| 563 |
|
| 564 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
| 565 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
| 566 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
| 567 |
+
bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
|
| 568 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
| 569 |
chatbot.like(print_like_dislike, None, None)
|
| 570 |
clear_button = gr.Button("Clear")
|
|
|
|
| 595 |
|
| 596 |
demo.queue()
|
| 597 |
demo.launch(share=True)
|
|
|