Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -24,6 +24,7 @@ from googlemaps import Client as GoogleMapsClient
|
|
24 |
from gtts import gTTS
|
25 |
from diffusers import StableDiffusion3Pipeline
|
26 |
import soundfile as sf
|
|
|
27 |
|
28 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
29 |
from langchain_pinecone import PineconeVectorStore
|
@@ -283,20 +284,74 @@ def bot(history, choice, tts_model):
|
|
283 |
|
284 |
# Generate audio and process output prompt in parallel
|
285 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
286 |
-
audio_future = executor.submit(
|
287 |
-
for
|
288 |
-
|
289 |
-
|
290 |
-
|
|
|
|
|
291 |
|
292 |
-
|
293 |
-
|
294 |
|
295 |
-
def
|
296 |
if tts_model == "ElevenLabs":
|
297 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
else:
|
299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
|
301 |
def add_message(history, message):
|
302 |
history.append((message, None))
|
@@ -350,7 +405,7 @@ def fetch_local_news():
|
|
350 |
api_key = os.environ['SERP_API']
|
351 |
url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
|
352 |
response = requests.get(url)
|
353 |
-
if response.status_code == 200:
|
354 |
results = response.json().get("news_results", [])
|
355 |
news_html = """
|
356 |
<h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
|
@@ -478,62 +533,6 @@ def show_map_if_details(history,choice):
|
|
478 |
else:
|
479 |
return gr.update(visible=False), ""
|
480 |
|
481 |
-
def generate_audio_elevenlabs(text):
|
482 |
-
XI_API_KEY = os.environ['ELEVENLABS_API']
|
483 |
-
VOICE_ID = 'd9MIrwLnvDeH7aZb61E9' # Replace with your voice ID
|
484 |
-
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
|
485 |
-
headers = {
|
486 |
-
"Accept": "application/json",
|
487 |
-
"xi-api-key": XI_API_KEY
|
488 |
-
}
|
489 |
-
data = {
|
490 |
-
"text": str(text),
|
491 |
-
"model_id": "eleven_multilingual_v2",
|
492 |
-
"voice_settings": {
|
493 |
-
"stability": 1.0,
|
494 |
-
"similarity_boost": 0.0,
|
495 |
-
"style": 0.60, # Adjust style for more romantic tone
|
496 |
-
"use_speaker_boost": False
|
497 |
-
}
|
498 |
-
}
|
499 |
-
response = requests.post(tts_url, headers=headers, json=data, stream=True)
|
500 |
-
if response.ok:
|
501 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
|
502 |
-
for chunk in response.iter_content(chunk_size=1024):
|
503 |
-
f.write(chunk)
|
504 |
-
temp_audio_path = f.name
|
505 |
-
logging.debug(f"Audio saved to {temp_audio_path}")
|
506 |
-
return temp_audio_path
|
507 |
-
else:
|
508 |
-
logging.error(f"Error generating audio: {response.text}")
|
509 |
-
return None
|
510 |
-
|
511 |
-
def generate_audio_parler_tts(text):
|
512 |
-
model_id = 'parler-tts/parler_tts_mini_v0.1'
|
513 |
-
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
514 |
-
try:
|
515 |
-
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
516 |
-
except torch.cuda.OutOfMemoryError:
|
517 |
-
print("CUDA out of memory. Switching to CPU.")
|
518 |
-
device = "cpu"
|
519 |
-
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
520 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
521 |
-
|
522 |
-
description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
523 |
-
|
524 |
-
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
525 |
-
prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
|
526 |
-
|
527 |
-
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids, max_new_tokens=200)
|
528 |
-
audio_arr = generation.cpu().numpy().squeeze()
|
529 |
-
|
530 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
531 |
-
sf.write(f.name, audio_arr, model.config.sampling_rate)
|
532 |
-
temp_audio_path = f.name
|
533 |
-
|
534 |
-
logging.debug(f"Audio saved to {temp_audio_path}")
|
535 |
-
return temp_audio_path
|
536 |
-
|
537 |
# Stable Diffusion setup
|
538 |
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
|
539 |
pipe = pipe.to("cuda")
|
|
|
24 |
from gtts import gTTS
|
25 |
from diffusers import StableDiffusion3Pipeline
|
26 |
import soundfile as sf
|
27 |
+
import numpy as np
|
28 |
|
29 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
30 |
from langchain_pinecone import PineconeVectorStore
|
|
|
284 |
|
285 |
# Generate audio and process output prompt in parallel
|
286 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
287 |
+
audio_future = executor.submit(generate_audio_chunks, tts_model, response)
|
288 |
+
text_chunks = [response[i:i + 100] for i in range(0, len(response), 100)] # Chunk the text for streaming
|
289 |
+
for text_chunk in text_chunks:
|
290 |
+
history[-1][1] += text_chunk
|
291 |
+
audio_chunk = next(audio_future.result(), None)
|
292 |
+
time.sleep(0.2) # Adjust this to synchronize text and audio appearance
|
293 |
+
yield history, audio_chunk
|
294 |
|
295 |
+
for remaining_audio_chunk in audio_future.result():
|
296 |
+
yield history, remaining_audio_chunk
|
297 |
|
298 |
+
def generate_audio_chunks(tts_model, text):
|
299 |
if tts_model == "ElevenLabs":
|
300 |
+
return generate_audio_elevenlabs_chunks(text)
|
301 |
+
else:
|
302 |
+
return generate_audio_parler_tts_chunks(text)
|
303 |
+
|
304 |
+
def generate_audio_elevenlabs_chunks(text):
|
305 |
+
XI_API_KEY = os.environ['ELEVENLABS_API']
|
306 |
+
VOICE_ID = 'd9MIrwLnvDeH7aZb61E9' # Replace with your voice ID
|
307 |
+
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
|
308 |
+
headers = {
|
309 |
+
"Accept": "application/json",
|
310 |
+
"xi-api-key": XI_API_KEY
|
311 |
+
}
|
312 |
+
data = {
|
313 |
+
"text": str(text),
|
314 |
+
"model_id": "eleven_multilingual_v2",
|
315 |
+
"voice_settings": {
|
316 |
+
"stability": 1.0,
|
317 |
+
"similarity_boost": 0.0,
|
318 |
+
"style": 0.60, # Adjust style for more romantic tone
|
319 |
+
"use_speaker_boost": False
|
320 |
+
}
|
321 |
+
}
|
322 |
+
response = requests.post(tts_url, headers=headers, json=data, stream=True)
|
323 |
+
if response.ok:
|
324 |
+
for chunk in response.iter_content(chunk_size=1024):
|
325 |
+
yield chunk
|
326 |
else:
|
327 |
+
logging.error(f"Error generating audio: {response.text}")
|
328 |
+
return None
|
329 |
+
|
330 |
+
def generate_audio_parler_tts_chunks(text):
|
331 |
+
model_id = 'parler-tts/parler_tts_mini_v0.1'
|
332 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
333 |
+
try:
|
334 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
335 |
+
except torch.cuda.OutOfMemoryError:
|
336 |
+
print("CUDA out of memory. Switching to CPU.")
|
337 |
+
device = "cpu"
|
338 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
|
339 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
340 |
+
|
341 |
+
description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
342 |
+
|
343 |
+
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
344 |
+
prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
|
345 |
+
|
346 |
+
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids, max_new_tokens=200)
|
347 |
+
audio_arr = generation.cpu().numpy().squeeze()
|
348 |
+
|
349 |
+
chunk_size = 16000 # Define a chunk size (adjust as needed)
|
350 |
+
for i in range(0, len(audio_arr), chunk_size):
|
351 |
+
audio_chunk = audio_arr[i:i + chunk_size]
|
352 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
353 |
+
sf.write(f.name, audio_chunk, model.config.sampling_rate)
|
354 |
+
yield f.name
|
355 |
|
356 |
def add_message(history, message):
|
357 |
history.append((message, None))
|
|
|
405 |
api_key = os.environ['SERP_API']
|
406 |
url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
|
407 |
response = requests.get(url)
|
408 |
+
if response.status_code == 200):
|
409 |
results = response.json().get("news_results", [])
|
410 |
news_html = """
|
411 |
<h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
|
|
|
533 |
else:
|
534 |
return gr.update(visible=False), ""
|
535 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
536 |
# Stable Diffusion setup
|
537 |
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
|
538 |
pipe = pipe.to("cuda")
|