Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import requests
|
3 |
import os
|
@@ -14,6 +24,7 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
|
14 |
from googlemaps import Client as GoogleMapsClient
|
15 |
from gtts import gTTS
|
16 |
from diffusers import StableDiffusion3Pipeline
|
|
|
17 |
|
18 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
19 |
from langchain_pinecone import PineconeVectorStore
|
@@ -264,7 +275,7 @@ def generate_answer(message, choice):
|
|
264 |
addresses = extract_addresses(response['output'])
|
265 |
return response['output'], addresses
|
266 |
|
267 |
-
def bot(history, choice):
|
268 |
if not history:
|
269 |
return history
|
270 |
response, addresses = generate_answer(history[-1][0], choice)
|
@@ -272,7 +283,10 @@ def bot(history, choice):
|
|
272 |
|
273 |
# Generate audio for the entire response in a separate thread
|
274 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
275 |
-
|
|
|
|
|
|
|
276 |
|
277 |
for character in response:
|
278 |
history[-1][1] += character
|
@@ -293,13 +307,6 @@ def extract_addresses(response):
|
|
293 |
if not isinstance(response, str):
|
294 |
response = str(response)
|
295 |
address_patterns = [
|
296 |
-
# r'([A-Z].*,\sOmaha,\sNE\s\d{5})',
|
297 |
-
# r'(\d{4}\s.*,\sOmaha,\sNE\s\d{5})',
|
298 |
-
# r'([A-Z].*,\sNE\s\d{5})',
|
299 |
-
# r'([A-Z].*,.*\sSt,\sOmaha,\sNE\s\d{5})',
|
300 |
-
# r'([A-Z].*,.*\sStreets,\sOmaha,\sNE\s\d{5})',
|
301 |
-
# r'(\d{2}.*\sStreets)',
|
302 |
-
# r'([A-Z].*\s\d{2},\sOmaha,\sNE\s\d{5})'
|
303 |
r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
|
304 |
r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
|
305 |
r'([A-Z].*,\sAL\s\d{5})',
|
@@ -415,6 +422,8 @@ def fetch_local_news():
|
|
415 |
import numpy as np
|
416 |
import torch
|
417 |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
|
|
|
|
418 |
|
419 |
model_id = 'openai/whisper-large-v3'
|
420 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
@@ -497,6 +506,27 @@ def generate_audio_elevenlabs(text):
|
|
497 |
logging.error(f"Error generating audio: {response.text}")
|
498 |
return None
|
499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
500 |
# Stable Diffusion setup
|
501 |
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
|
502 |
pipe = pipe.to("cuda")
|
@@ -521,8 +551,6 @@ def update_images():
|
|
521 |
image_3 = generate_image(hardcoded_prompt_3)
|
522 |
return image_1, image_2, image_3
|
523 |
|
524 |
-
|
525 |
-
|
526 |
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
527 |
|
528 |
with gr.Row():
|
@@ -531,11 +559,12 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
531 |
|
532 |
chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
|
533 |
choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
|
|
|
534 |
|
535 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
536 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
537 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
538 |
-
bot_msg = chat_msg.then(bot, [chatbot, choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
|
539 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
540 |
chatbot.like(print_like_dislike, None, None)
|
541 |
clear_button = gr.Button("Clear")
|
@@ -566,4 +595,3 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
566 |
|
567 |
demo.queue()
|
568 |
demo.launch(share=True)
|
569 |
-
|
|
|
1 |
+
import subprocess
|
2 |
+
import sys
|
3 |
+
|
4 |
+
def install_parler_tts():
|
5 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/parler-tts.git"])
|
6 |
+
|
7 |
+
# Call the function to install parler-tts
|
8 |
+
install_parler_tts()
|
9 |
+
|
10 |
+
|
11 |
import gradio as gr
|
12 |
import requests
|
13 |
import os
|
|
|
24 |
from googlemaps import Client as GoogleMapsClient
|
25 |
from gtts import gTTS
|
26 |
from diffusers import StableDiffusion3Pipeline
|
27 |
+
import soundfile as sf
|
28 |
|
29 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
30 |
from langchain_pinecone import PineconeVectorStore
|
|
|
275 |
addresses = extract_addresses(response['output'])
|
276 |
return response['output'], addresses
|
277 |
|
278 |
+
def bot(history, choice, tts_model):
|
279 |
if not history:
|
280 |
return history
|
281 |
response, addresses = generate_answer(history[-1][0], choice)
|
|
|
283 |
|
284 |
# Generate audio for the entire response in a separate thread
|
285 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
286 |
+
if tts_model == "ElevenLabs":
|
287 |
+
audio_future = executor.submit(generate_audio_elevenlabs, response)
|
288 |
+
else:
|
289 |
+
audio_future = executor.submit(generate_audio_parler_tts, response)
|
290 |
|
291 |
for character in response:
|
292 |
history[-1][1] += character
|
|
|
307 |
if not isinstance(response, str):
|
308 |
response = str(response)
|
309 |
address_patterns = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
|
311 |
r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
|
312 |
r'([A-Z].*,\sAL\s\d{5})',
|
|
|
422 |
import numpy as np
|
423 |
import torch
|
424 |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
425 |
+
from parler_tts import ParlerTTSForConditionalGeneration
|
426 |
+
from transformers import AutoTokenizer
|
427 |
|
428 |
model_id = 'openai/whisper-large-v3'
|
429 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
506 |
logging.error(f"Error generating audio: {response.text}")
|
507 |
return None
|
508 |
|
509 |
+
def generate_audio_parler_tts(text):
|
510 |
+
model_id = 'parler-tts/parler_tts_mini_v0.1'
|
511 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
512 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
513 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
514 |
+
|
515 |
+
description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
516 |
+
|
517 |
+
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
518 |
+
prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
|
519 |
+
|
520 |
+
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
521 |
+
audio_arr = generation.cpu().numpy().squeeze()
|
522 |
+
|
523 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
524 |
+
sf.write(f.name, audio_arr, model.config.sampling_rate)
|
525 |
+
temp_audio_path = f.name
|
526 |
+
|
527 |
+
logging.debug(f"Audio saved to {temp_audio_path}")
|
528 |
+
return temp_audio_path
|
529 |
+
|
530 |
# Stable Diffusion setup
|
531 |
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
|
532 |
pipe = pipe.to("cuda")
|
|
|
551 |
image_3 = generate_image(hardcoded_prompt_3)
|
552 |
return image_1, image_2, image_3
|
553 |
|
|
|
|
|
554 |
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
555 |
|
556 |
with gr.Row():
|
|
|
559 |
|
560 |
chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
|
561 |
choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
|
562 |
+
tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
|
563 |
|
564 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
565 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
566 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
567 |
+
bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
|
568 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
569 |
chatbot.like(print_like_dislike, None, None)
|
570 |
clear_button = gr.Button("Clear")
|
|
|
595 |
|
596 |
demo.queue()
|
597 |
demo.launch(share=True)
|
|