IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Jul 5, 2024

Commit

c49ac12

verified ·

1 Parent(s): 4743a3f

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -77

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ def install_parler_tts():
 # Call the function to install parler-tts
 install_parler_tts()
 import gradio as gr
 import requests
 import os
@@ -24,7 +25,6 @@ from googlemaps import Client as GoogleMapsClient
 from gtts import gTTS
 from diffusers import StableDiffusion3Pipeline
 import soundfile as sf
-import numpy as np
 from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 from langchain_pinecone import PineconeVectorStore
@@ -173,7 +173,7 @@ def fetch_local_weather():
             }}
             .weather-content {{
                 display: flex;
-                align-items: center.
             }}
             .weather-icon {{
                 flex: 1;
@@ -278,84 +278,23 @@ def generate_answer(message, choice):
 def bot(history, choice, tts_model):
     if not history:
         return history
     response, addresses = generate_answer(history[-1][0], choice)
     history[-1][1] = ""
-    # Generate audio and process output prompt in parallel
     with concurrent.futures.ThreadPoolExecutor() as executor:
-        audio_chunks_future = executor.submit(generate_audio_chunks, tts_model, response)
-        text_chunks = [response[i:i + 100] for i in range(0, len(response), 100)]  # Chunk the text for streaming
-        audio_chunks = audio_chunks_future.result()
-        for text_chunk, audio_chunk in zip(text_chunks, audio_chunks):
-            history[-1][1] += text_chunk
-            yield history, audio_chunk
-            time.sleep(0.2)  # Adjust this to synchronize text and audio appearance
-def generate_audio_chunks(tts_model, text):
-    if tts_model == "ElevenLabs":
-        return generate_audio_elevenlabs_chunks(text)
-    else:
-        return generate_audio_parler_tts_chunks(text)
-def generate_audio_elevenlabs_chunks(text):
-    XI_API_KEY = os.environ['ELEVENLABS_API']
-    VOICE_ID = 'd9MIrwLnvDeH7aZb61E9'  # Replace with your voice ID
-    tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
-    headers = {
-        "Accept": "application/json",
-        "xi-api-key": XI_API_KEY
-    }
-    data = {
-        "text": str(text),
-        "model_id": "eleven_multilingual_v2",
-        "voice_settings": {
-            "stability": 1.0,
-            "similarity_boost": 0.0,
-            "style": 0.60,  # Adjust style for more romantic tone
-            "use_speaker_boost": False
-        }
-    }
-    response = requests.post(tts_url, headers=headers, json=data, stream=True)
-    audio_chunks = []
-    if response.ok:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-            for chunk in response.iter_content(chunk_size=1024):
-                f.write(chunk)
-                audio_chunks.append(f.name)
-        return audio_chunks
-    else:
-        logging.error(f"Error generating audio: {response.text}")
-        return []
-def generate_audio_parler_tts_chunks(text):
-    model_id = 'parler-tts/parler_tts_mini_v0.1'
-    device = "cuda:0" if torch.cuda.is_available() else "cpu"
-    try:
-        model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
-    except torch.cuda.OutOfMemoryError:
-        print("CUDA out of memory. Switching to CPU.")
-        device = "cpu"
-        model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
-    input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
-    prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
-    generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids, max_new_tokens=200)
-    audio_arr = generation.cpu().numpy().squeeze()
-    chunk_size = 16000  # Define a chunk size (adjust as needed)
-    audio_chunks = []
-    for i in range(0, len(audio_arr), chunk_size):
-        audio_chunk = audio_arr[i:i + chunk_size]
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-            sf.write(f.name, audio_chunk, model.config.sampling_rate)
-            audio_chunks.append(f.name)
-    return audio_chunks
 def add_message(history, message):
     history.append((message, None))
@@ -409,7 +348,7 @@ def fetch_local_news():
     api_key = os.environ['SERP_API']
     url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
     response = requests.get(url)
-    if response.status_code == 200:
         results = response.json().get("news_results", [])
         news_html = """
         <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
@@ -490,6 +429,7 @@ model_id = 'openai/whisper-large-v3'
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype,
                                                   use_safetensors=True).to(device)
 processor = AutoProcessor.from_pretrained(model_id)
@@ -536,6 +476,62 @@ def show_map_if_details(history,choice):
     else:
         return gr.update(visible=False), ""
 # Stable Diffusion setup
 pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
 pipe = pipe.to("cuda")

 # Call the function to install parler-tts
 install_parler_tts()
 import gradio as gr
 import requests
 import os
 from gtts import gTTS
 from diffusers import StableDiffusion3Pipeline
 import soundfile as sf
 from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 from langchain_pinecone import PineconeVectorStore
             }}
             .weather-content {{
                 display: flex;
+                align-items: center;
             }}
             .weather-icon {{
                 flex: 1;
 def bot(history, choice, tts_model):
     if not history:
         return history
     response, addresses = generate_answer(history[-1][0], choice)
     history[-1][1] = ""
+    # Generate audio for the entire response in a separate thread
     with concurrent.futures.ThreadPoolExecutor() as executor:
+        if tts_model == "ElevenLabs":
+            audio_future = executor.submit(generate_audio_elevenlabs, response)
+        else:
+            audio_future = executor.submit(generate_audio_parler_tts, response)
+        for character in response:
+            history[-1][1] += character
+            time.sleep(0.05)  # Adjust the speed of text appearance
+            yield history, None
+        audio_path = audio_future.result()
+        yield history, audio_path
 def add_message(history, message):
     history.append((message, None))
     api_key = os.environ['SERP_API']
     url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
     response = requests.get(url)
+    if response.status_code == 200):
         results = response.json().get("news_results", [])
         news_html = """
         <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype,
+                                                  #low_cpu_mem_usage=True,
                                                   use_safetensors=True).to(device)
 processor = AutoProcessor.from_pretrained(model_id)
     else:
         return gr.update(visible=False), ""
+def generate_audio_elevenlabs(text):
+    XI_API_KEY = os.environ['ELEVENLABS_API']
+    VOICE_ID = 'd9MIrwLnvDeH7aZb61E9'  # Replace with your voice ID
+    tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
+    headers = {
+        "Accept": "application/json",
+        "xi-api-key": XI_API_KEY
+    }
+    data = {
+        "text": str(text),
+        "model_id": "eleven_multilingual_v2",
+        "voice_settings": {
+            "stability": 1.0,
+            "similarity_boost": 0.0,
+            "style": 0.60,  # Adjust style for more romantic tone
+            "use_speaker_boost": False
+        }
+    }
+    response = requests.post(tts_url, headers=headers, json=data, stream=True)
+    if response.ok:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
+            for chunk in response.iter_content(chunk_size=1024):
+                f.write(chunk)
+            temp_audio_path = f.name
+        logging.debug(f"Audio saved to {temp_audio_path}")
+        return temp_audio_path
+    else:
+        logging.error(f"Error generating audio: {response.text}")
+        return None
+def generate_audio_parler_tts(text):
+    model_id = 'parler-tts/parler_tts_mini_v0.1'
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    try:
+        model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
+    except torch.cuda.OutOfMemoryError:
+        print("CUDA out of memory. Switching to CPU.")
+        device = "cpu"
+        model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
+    input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
+    prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
+    generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
+    audio_arr = generation.cpu().numpy().squeeze()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+        sf.write(f.name, audio_arr, model.config.sampling_rate)
+        temp_audio_path = f.name
+    logging.debug(f"Audio saved to {temp_audio_path}")
+    return temp_audio_path
 # Stable Diffusion setup
 pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
 pipe = pipe.to("cuda")