Pijush2023 commited on
Commit
e5a569c
·
verified ·
1 Parent(s): bba393e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -13
app.py CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import requests
3
  import os
@@ -14,6 +24,7 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
14
  from googlemaps import Client as GoogleMapsClient
15
  from gtts import gTTS
16
  from diffusers import StableDiffusion3Pipeline
 
17
 
18
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
19
  from langchain_pinecone import PineconeVectorStore
@@ -264,7 +275,7 @@ def generate_answer(message, choice):
264
  addresses = extract_addresses(response['output'])
265
  return response['output'], addresses
266
 
267
- def bot(history, choice):
268
  if not history:
269
  return history
270
  response, addresses = generate_answer(history[-1][0], choice)
@@ -272,7 +283,10 @@ def bot(history, choice):
272
 
273
  # Generate audio for the entire response in a separate thread
274
  with concurrent.futures.ThreadPoolExecutor() as executor:
275
- audio_future = executor.submit(generate_audio_elevenlabs, response)
 
 
 
276
 
277
  for character in response:
278
  history[-1][1] += character
@@ -293,13 +307,6 @@ def extract_addresses(response):
293
  if not isinstance(response, str):
294
  response = str(response)
295
  address_patterns = [
296
- # r'([A-Z].*,\sOmaha,\sNE\s\d{5})',
297
- # r'(\d{4}\s.*,\sOmaha,\sNE\s\d{5})',
298
- # r'([A-Z].*,\sNE\s\d{5})',
299
- # r'([A-Z].*,.*\sSt,\sOmaha,\sNE\s\d{5})',
300
- # r'([A-Z].*,.*\sStreets,\sOmaha,\sNE\s\d{5})',
301
- # r'(\d{2}.*\sStreets)',
302
- # r'([A-Z].*\s\d{2},\sOmaha,\sNE\s\d{5})'
303
  r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
304
  r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
305
  r'([A-Z].*,\sAL\s\d{5})',
@@ -415,6 +422,8 @@ def fetch_local_news():
415
  import numpy as np
416
  import torch
417
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 
 
418
 
419
  model_id = 'openai/whisper-large-v3'
420
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -497,6 +506,27 @@ def generate_audio_elevenlabs(text):
497
  logging.error(f"Error generating audio: {response.text}")
498
  return None
499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  # Stable Diffusion setup
501
  pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
502
  pipe = pipe.to("cuda")
@@ -521,8 +551,6 @@ def update_images():
521
  image_3 = generate_image(hardcoded_prompt_3)
522
  return image_1, image_2, image_3
523
 
524
-
525
-
526
  with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
527
 
528
  with gr.Row():
@@ -531,11 +559,12 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
531
 
532
  chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
533
  choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
 
534
 
535
  gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
536
  chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
537
  chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
538
- bot_msg = chat_msg.then(bot, [chatbot, choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
539
  bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
540
  chatbot.like(print_like_dislike, None, None)
541
  clear_button = gr.Button("Clear")
@@ -566,4 +595,3 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
566
 
567
  demo.queue()
568
  demo.launch(share=True)
569
-
 
1
+ import subprocess
2
+ import sys
3
+
4
+ def install_parler_tts():
5
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/parler-tts.git"])
6
+
7
+ # Call the function to install parler-tts
8
+ install_parler_tts()
9
+
10
+
11
  import gradio as gr
12
  import requests
13
  import os
 
24
  from googlemaps import Client as GoogleMapsClient
25
  from gtts import gTTS
26
  from diffusers import StableDiffusion3Pipeline
27
+ import soundfile as sf
28
 
29
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
30
  from langchain_pinecone import PineconeVectorStore
 
275
  addresses = extract_addresses(response['output'])
276
  return response['output'], addresses
277
 
278
+ def bot(history, choice, tts_model):
279
  if not history:
280
  return history
281
  response, addresses = generate_answer(history[-1][0], choice)
 
283
 
284
  # Generate audio for the entire response in a separate thread
285
  with concurrent.futures.ThreadPoolExecutor() as executor:
286
+ if tts_model == "ElevenLabs":
287
+ audio_future = executor.submit(generate_audio_elevenlabs, response)
288
+ else:
289
+ audio_future = executor.submit(generate_audio_parler_tts, response)
290
 
291
  for character in response:
292
  history[-1][1] += character
 
307
  if not isinstance(response, str):
308
  response = str(response)
309
  address_patterns = [
 
 
 
 
 
 
 
310
  r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
311
  r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
312
  r'([A-Z].*,\sAL\s\d{5})',
 
422
  import numpy as np
423
  import torch
424
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
425
+ from parler_tts import ParlerTTSForConditionalGeneration
426
+ from transformers import AutoTokenizer
427
 
428
  model_id = 'openai/whisper-large-v3'
429
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
506
  logging.error(f"Error generating audio: {response.text}")
507
  return None
508
 
509
+ def generate_audio_parler_tts(text):
510
+ model_id = 'parler-tts/parler_tts_mini_v0.1'
511
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
512
+ model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
513
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
514
+
515
+ description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
516
+
517
+ input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
518
+ prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
519
+
520
+ generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
521
+ audio_arr = generation.cpu().numpy().squeeze()
522
+
523
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
524
+ sf.write(f.name, audio_arr, model.config.sampling_rate)
525
+ temp_audio_path = f.name
526
+
527
+ logging.debug(f"Audio saved to {temp_audio_path}")
528
+ return temp_audio_path
529
+
530
  # Stable Diffusion setup
531
  pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
532
  pipe = pipe.to("cuda")
 
551
  image_3 = generate_image(hardcoded_prompt_3)
552
  return image_1, image_2, image_3
553
 
 
 
554
  with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
555
 
556
  with gr.Row():
 
559
 
560
  chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
561
  choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
562
+ tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
563
 
564
  gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
565
  chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
566
  chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
567
+ bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
568
  bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
569
  chatbot.like(print_like_dislike, None, None)
570
  clear_button = gr.Button("Clear")
 
595
 
596
  demo.queue()
597
  demo.launch(share=True)