Pijush2023 commited on
Commit
d66d787
·
verified ·
1 Parent(s): f14ffd4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -67
app.py CHANGED
@@ -24,6 +24,7 @@ from googlemaps import Client as GoogleMapsClient
24
  from gtts import gTTS
25
  from diffusers import StableDiffusion3Pipeline
26
  import soundfile as sf
 
27
 
28
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
29
  from langchain_pinecone import PineconeVectorStore
@@ -283,20 +284,74 @@ def bot(history, choice, tts_model):
283
 
284
  # Generate audio and process output prompt in parallel
285
  with concurrent.futures.ThreadPoolExecutor() as executor:
286
- audio_future = executor.submit(generate_audio, tts_model, response)
287
- for character in response:
288
- history[-1][1] += character
289
- time.sleep(0.05) # Adjust the speed of text appearance
290
- yield history, None
 
 
291
 
292
- audio_path = audio_future.result()
293
- yield history, audio_path
294
 
295
- def generate_audio(tts_model, text):
296
  if tts_model == "ElevenLabs":
297
- return generate_audio_elevenlabs(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  else:
299
- return generate_audio_parler_tts(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
  def add_message(history, message):
302
  history.append((message, None))
@@ -350,7 +405,7 @@ def fetch_local_news():
350
  api_key = os.environ['SERP_API']
351
  url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
352
  response = requests.get(url)
353
- if response.status_code == 200:
354
  results = response.json().get("news_results", [])
355
  news_html = """
356
  <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
@@ -478,62 +533,6 @@ def show_map_if_details(history,choice):
478
  else:
479
  return gr.update(visible=False), ""
480
 
481
- def generate_audio_elevenlabs(text):
482
- XI_API_KEY = os.environ['ELEVENLABS_API']
483
- VOICE_ID = 'd9MIrwLnvDeH7aZb61E9' # Replace with your voice ID
484
- tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
485
- headers = {
486
- "Accept": "application/json",
487
- "xi-api-key": XI_API_KEY
488
- }
489
- data = {
490
- "text": str(text),
491
- "model_id": "eleven_multilingual_v2",
492
- "voice_settings": {
493
- "stability": 1.0,
494
- "similarity_boost": 0.0,
495
- "style": 0.60, # Adjust style for more romantic tone
496
- "use_speaker_boost": False
497
- }
498
- }
499
- response = requests.post(tts_url, headers=headers, json=data, stream=True)
500
- if response.ok:
501
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
502
- for chunk in response.iter_content(chunk_size=1024):
503
- f.write(chunk)
504
- temp_audio_path = f.name
505
- logging.debug(f"Audio saved to {temp_audio_path}")
506
- return temp_audio_path
507
- else:
508
- logging.error(f"Error generating audio: {response.text}")
509
- return None
510
-
511
- def generate_audio_parler_tts(text):
512
- model_id = 'parler-tts/parler_tts_mini_v0.1'
513
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
514
- try:
515
- model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
516
- except torch.cuda.OutOfMemoryError:
517
- print("CUDA out of memory. Switching to CPU.")
518
- device = "cpu"
519
- model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
520
- tokenizer = AutoTokenizer.from_pretrained(model_id)
521
-
522
- description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
523
-
524
- input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
525
- prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
526
-
527
- generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids, max_new_tokens=200)
528
- audio_arr = generation.cpu().numpy().squeeze()
529
-
530
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
531
- sf.write(f.name, audio_arr, model.config.sampling_rate)
532
- temp_audio_path = f.name
533
-
534
- logging.debug(f"Audio saved to {temp_audio_path}")
535
- return temp_audio_path
536
-
537
  # Stable Diffusion setup
538
  pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
539
  pipe = pipe.to("cuda")
 
24
  from gtts import gTTS
25
  from diffusers import StableDiffusion3Pipeline
26
  import soundfile as sf
27
+ import numpy as np
28
 
29
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
30
  from langchain_pinecone import PineconeVectorStore
 
284
 
285
  # Generate audio and process output prompt in parallel
286
  with concurrent.futures.ThreadPoolExecutor() as executor:
287
+ audio_future = executor.submit(generate_audio_chunks, tts_model, response)
288
+ text_chunks = [response[i:i + 100] for i in range(0, len(response), 100)] # Chunk the text for streaming
289
+ for text_chunk in text_chunks:
290
+ history[-1][1] += text_chunk
291
+ audio_chunk = next(audio_future.result(), None)
292
+ time.sleep(0.2) # Adjust this to synchronize text and audio appearance
293
+ yield history, audio_chunk
294
 
295
+ for remaining_audio_chunk in audio_future.result():
296
+ yield history, remaining_audio_chunk
297
 
298
+ def generate_audio_chunks(tts_model, text):
299
  if tts_model == "ElevenLabs":
300
+ return generate_audio_elevenlabs_chunks(text)
301
+ else:
302
+ return generate_audio_parler_tts_chunks(text)
303
+
304
+ def generate_audio_elevenlabs_chunks(text):
305
+ XI_API_KEY = os.environ['ELEVENLABS_API']
306
+ VOICE_ID = 'd9MIrwLnvDeH7aZb61E9' # Replace with your voice ID
307
+ tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
308
+ headers = {
309
+ "Accept": "application/json",
310
+ "xi-api-key": XI_API_KEY
311
+ }
312
+ data = {
313
+ "text": str(text),
314
+ "model_id": "eleven_multilingual_v2",
315
+ "voice_settings": {
316
+ "stability": 1.0,
317
+ "similarity_boost": 0.0,
318
+ "style": 0.60, # Adjust style for more romantic tone
319
+ "use_speaker_boost": False
320
+ }
321
+ }
322
+ response = requests.post(tts_url, headers=headers, json=data, stream=True)
323
+ if response.ok:
324
+ for chunk in response.iter_content(chunk_size=1024):
325
+ yield chunk
326
  else:
327
+ logging.error(f"Error generating audio: {response.text}")
328
+ return None
329
+
330
+ def generate_audio_parler_tts_chunks(text):
331
+ model_id = 'parler-tts/parler_tts_mini_v0.1'
332
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
333
+ try:
334
+ model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
335
+ except torch.cuda.OutOfMemoryError:
336
+ print("CUDA out of memory. Switching to CPU.")
337
+ device = "cpu"
338
+ model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
339
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
340
+
341
+ description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
342
+
343
+ input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
344
+ prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
345
+
346
+ generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids, max_new_tokens=200)
347
+ audio_arr = generation.cpu().numpy().squeeze()
348
+
349
+ chunk_size = 16000 # Define a chunk size (adjust as needed)
350
+ for i in range(0, len(audio_arr), chunk_size):
351
+ audio_chunk = audio_arr[i:i + chunk_size]
352
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
353
+ sf.write(f.name, audio_chunk, model.config.sampling_rate)
354
+ yield f.name
355
 
356
  def add_message(history, message):
357
  history.append((message, None))
 
405
  api_key = os.environ['SERP_API']
406
  url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
407
  response = requests.get(url)
408
+ if response.status_code == 200):
409
  results = response.json().get("news_results", [])
410
  news_html = """
411
  <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
 
533
  else:
534
  return gr.update(visible=False), ""
535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  # Stable Diffusion setup
537
  pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
538
  pipe = pipe.to("cuda")