Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,6 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
|
14 |
from googlemaps import Client as GoogleMapsClient
|
15 |
from gtts import gTTS
|
16 |
from diffusers import StableDiffusion3Pipeline
|
17 |
-
|
18 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
19 |
from langchain_pinecone import PineconeVectorStore
|
20 |
from langchain.prompts import PromptTemplate
|
@@ -22,27 +21,21 @@ from langchain.chains import RetrievalQA
|
|
22 |
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
|
23 |
from langchain.agents import Tool, initialize_agent
|
24 |
from huggingface_hub import login
|
|
|
|
|
|
|
25 |
|
26 |
# Check if the token is already set in the environment variables
|
27 |
hf_token = os.getenv("HF_TOKEN")
|
28 |
-
|
29 |
if hf_token is None:
|
30 |
-
# If the token is not set, prompt for it (this should be done securely)
|
31 |
print("Please set your Hugging Face token in the environment variables.")
|
32 |
else:
|
33 |
-
# Login using the token
|
34 |
login(token=hf_token)
|
35 |
|
36 |
-
# Your application logic goes here
|
37 |
-
print("Logged in successfully to Hugging Face Hub!")
|
38 |
-
|
39 |
-
# Set up logging
|
40 |
logging.basicConfig(level=logging.DEBUG)
|
41 |
|
42 |
-
# Initialize OpenAI embeddings
|
43 |
embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
|
44 |
|
45 |
-
# Initialize Pinecone
|
46 |
from pinecone import Pinecone
|
47 |
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
|
48 |
|
@@ -50,9 +43,7 @@ index_name = "birmingham-dataset"
|
|
50 |
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
|
51 |
retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
|
52 |
|
53 |
-
|
54 |
-
chat_model = ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'],
|
55 |
-
temperature=0, model='gpt-4o')
|
56 |
|
57 |
conversational_memory = ConversationBufferWindowMemory(
|
58 |
memory_key='chat_history',
|
@@ -64,13 +55,11 @@ def get_current_time_and_date():
|
|
64 |
now = datetime.now()
|
65 |
return now.strftime("%Y-%m-%d %H:%M:%S")
|
66 |
|
67 |
-
# Example usage
|
68 |
current_time_and_date = get_current_time_and_date()
|
69 |
|
70 |
def fetch_local_events():
|
71 |
api_key = os.environ['SERP_API']
|
72 |
url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
|
73 |
-
|
74 |
response = requests.get(url)
|
75 |
if response.status_code == 200:
|
76 |
events_results = response.json().get("events_results", [])
|
@@ -193,11 +182,6 @@ def get_weather_icon(condition):
|
|
193 |
}
|
194 |
return condition_map.get(condition, "c04d")
|
195 |
|
196 |
-
# Update prompt templates to include fetched details
|
197 |
-
|
198 |
-
current_time_and_date = get_current_time_and_date()
|
199 |
-
|
200 |
-
# Define prompt templates
|
201 |
template1 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on weather being a sunny bright day and the today's date is 1st july 2024, use the following pieces of context,
|
202 |
memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
|
203 |
Use fifteen sentences maximum. Keep the answer as detailed as possible. Always include the address, time, date, and
|
@@ -216,7 +200,6 @@ Helpful Answer:"""
|
|
216 |
QA_CHAIN_PROMPT_1 = PromptTemplate(input_variables=["context", "question"], template=template1)
|
217 |
QA_CHAIN_PROMPT_2 = PromptTemplate(input_variables=["context", "question"], template=template2)
|
218 |
|
219 |
-
# Define the retrieval QA chain
|
220 |
def build_qa_chain(prompt_template):
|
221 |
qa_chain = RetrievalQA.from_chain_type(
|
222 |
llm=chat_model,
|
@@ -233,7 +216,6 @@ def build_qa_chain(prompt_template):
|
|
233 |
]
|
234 |
return qa_chain, tools
|
235 |
|
236 |
-
# Define the agent initializer
|
237 |
def initialize_agent_with_prompt(prompt_template):
|
238 |
qa_chain, tools = build_qa_chain(prompt_template)
|
239 |
agent = initialize_agent(
|
@@ -247,7 +229,6 @@ def initialize_agent_with_prompt(prompt_template):
|
|
247 |
)
|
248 |
return agent
|
249 |
|
250 |
-
# Define the function to generate answers
|
251 |
def generate_answer(message, choice):
|
252 |
logging.debug(f"generate_answer called with prompt_choice: {choice}")
|
253 |
|
@@ -260,23 +241,24 @@ def generate_answer(message, choice):
|
|
260 |
agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_2)
|
261 |
response = agent(message)
|
262 |
|
263 |
-
# Extract addresses for mapping regardless of the choice
|
264 |
addresses = extract_addresses(response['output'])
|
265 |
return response['output'], addresses
|
266 |
|
267 |
-
def bot(history, choice):
|
268 |
if not history:
|
269 |
return history
|
270 |
response, addresses = generate_answer(history[-1][0], choice)
|
271 |
history[-1][1] = ""
|
272 |
|
273 |
-
# Generate audio for the entire response in a separate thread
|
274 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
275 |
-
|
|
|
|
|
|
|
276 |
|
277 |
for character in response:
|
278 |
history[-1][1] += character
|
279 |
-
time.sleep(0.05)
|
280 |
yield history, None
|
281 |
|
282 |
audio_path = audio_future.result()
|
@@ -293,20 +275,13 @@ def extract_addresses(response):
|
|
293 |
if not isinstance(response, str):
|
294 |
response = str(response)
|
295 |
address_patterns = [
|
296 |
-
# r'([A-Z].*,\sOmaha,\sNE\s\d{5})',
|
297 |
-
# r'(\d{4}\s.*,\sOmaha,\sNE\s\d{5})',
|
298 |
-
# r'([A-Z].*,\sNE\s\d{5})',
|
299 |
-
# r'([A-Z].*,.*\sSt,\sOmaha,\sNE\s\d{5})',
|
300 |
-
# r'([A-Z].*,.*\sStreets,\sOmaha,\sNE\s\d{5})',
|
301 |
-
# r'(\d{2}.*\sStreets)',
|
302 |
-
# r'([A-Z].*\s\d{2},\sOmaha,\sNE\s\d{5})'
|
303 |
r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
|
304 |
r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
|
305 |
r'([A-Z].*,\sAL\s\d{5})',
|
306 |
r'([A-Z].*,.*\sSt,\sBirmingham,\sAL\s\d{5})',
|
307 |
r'([A-Z].*,.*\sStreets,\sBirmingham,\sAL\s\d{5})',
|
308 |
r'(\d{2}.*\sStreets)',
|
309 |
-
r'([A-Z].*\s\d{2},\sBirmingham,\sAL\s\d{5})'
|
310 |
r'([a-zA-Z]\s Birmingham)'
|
311 |
]
|
312 |
addresses = []
|
@@ -411,7 +386,6 @@ def fetch_local_news():
|
|
411 |
else:
|
412 |
return "<p>Failed to fetch local news</p>"
|
413 |
|
414 |
-
# Voice Control
|
415 |
import numpy as np
|
416 |
import torch
|
417 |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
@@ -419,18 +393,13 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
|
419 |
model_id = 'openai/whisper-large-v3'
|
420 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
421 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
422 |
-
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype
|
423 |
-
#low_cpu_mem_usage=True,
|
424 |
-
use_safetensors=True).to(device)
|
425 |
processor = AutoProcessor.from_pretrained(model_id)
|
426 |
|
427 |
-
# Optimized ASR pipeline
|
428 |
pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=True)
|
429 |
|
430 |
base_audio_drive = "/data/audio"
|
431 |
|
432 |
-
import numpy as np
|
433 |
-
|
434 |
def transcribe_function(stream, new_chunk):
|
435 |
try:
|
436 |
sr, y = new_chunk[0], new_chunk[1]
|
@@ -469,7 +438,7 @@ def show_map_if_details(history,choice):
|
|
469 |
|
470 |
def generate_audio_elevenlabs(text):
|
471 |
XI_API_KEY = os.environ['ELEVENLABS_API']
|
472 |
-
VOICE_ID = 'd9MIrwLnvDeH7aZb61E9'
|
473 |
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
|
474 |
headers = {
|
475 |
"Accept": "application/json",
|
@@ -481,7 +450,7 @@ def generate_audio_elevenlabs(text):
|
|
481 |
"voice_settings": {
|
482 |
"stability": 1.0,
|
483 |
"similarity_boost": 0.0,
|
484 |
-
"style": 0.60,
|
485 |
"use_speaker_boost": False
|
486 |
}
|
487 |
}
|
@@ -497,7 +466,49 @@ def generate_audio_elevenlabs(text):
|
|
497 |
logging.error(f"Error generating audio: {response.text}")
|
498 |
return None
|
499 |
|
500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
|
502 |
pipe = pipe.to("cuda")
|
503 |
|
@@ -510,9 +521,8 @@ def generate_image(prompt):
|
|
510 |
).images[0]
|
511 |
return image
|
512 |
|
513 |
-
|
514 |
-
|
515 |
-
hardcoded_prompt_2="A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
|
516 |
hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
|
517 |
|
518 |
def update_images():
|
@@ -521,10 +531,7 @@ def update_images():
|
|
521 |
image_3 = generate_image(hardcoded_prompt_3)
|
522 |
return image_1, image_2, image_3
|
523 |
|
524 |
-
|
525 |
-
|
526 |
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
527 |
-
|
528 |
with gr.Row():
|
529 |
with gr.Column():
|
530 |
state = gr.State()
|
@@ -535,13 +542,13 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
535 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
536 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
537 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
538 |
-
|
|
|
539 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
540 |
chatbot.like(print_like_dislike, None, None)
|
541 |
clear_button = gr.Button("Clear")
|
542 |
clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
|
543 |
|
544 |
-
|
545 |
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
|
546 |
audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
|
547 |
|
@@ -552,18 +559,15 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
552 |
with gr.Column():
|
553 |
weather_output = gr.HTML(value=fetch_local_weather())
|
554 |
news_output = gr.HTML(value=fetch_local_news())
|
555 |
-
|
556 |
|
557 |
with gr.Column():
|
558 |
-
|
559 |
image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
|
560 |
image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
|
561 |
image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
|
562 |
|
563 |
-
|
564 |
refresh_button = gr.Button("Refresh Images")
|
565 |
refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
|
566 |
-
|
567 |
demo.queue()
|
568 |
demo.launch(share=True)
|
569 |
-
|
|
|
14 |
from googlemaps import Client as GoogleMapsClient
|
15 |
from gtts import gTTS
|
16 |
from diffusers import StableDiffusion3Pipeline
|
|
|
17 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
18 |
from langchain_pinecone import PineconeVectorStore
|
19 |
from langchain.prompts import PromptTemplate
|
|
|
21 |
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
|
22 |
from langchain.agents import Tool, initialize_agent
|
23 |
from huggingface_hub import login
|
24 |
+
from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
|
25 |
+
from parler_tts import ParlerTTSForConditionalGeneration
|
26 |
+
from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
|
27 |
|
28 |
# Check if the token is already set in the environment variables
|
29 |
hf_token = os.getenv("HF_TOKEN")
|
|
|
30 |
if hf_token is None:
|
|
|
31 |
print("Please set your Hugging Face token in the environment variables.")
|
32 |
else:
|
|
|
33 |
login(token=hf_token)
|
34 |
|
|
|
|
|
|
|
|
|
35 |
logging.basicConfig(level=logging.DEBUG)
|
36 |
|
|
|
37 |
embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
|
38 |
|
|
|
39 |
from pinecone import Pinecone
|
40 |
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
|
41 |
|
|
|
43 |
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
|
44 |
retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
|
45 |
|
46 |
+
chat_model = ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'], temperature=0, model='gpt-4o')
|
|
|
|
|
47 |
|
48 |
conversational_memory = ConversationBufferWindowMemory(
|
49 |
memory_key='chat_history',
|
|
|
55 |
now = datetime.now()
|
56 |
return now.strftime("%Y-%m-%d %H:%M:%S")
|
57 |
|
|
|
58 |
current_time_and_date = get_current_time_and_date()
|
59 |
|
60 |
def fetch_local_events():
|
61 |
api_key = os.environ['SERP_API']
|
62 |
url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
|
|
|
63 |
response = requests.get(url)
|
64 |
if response.status_code == 200:
|
65 |
events_results = response.json().get("events_results", [])
|
|
|
182 |
}
|
183 |
return condition_map.get(condition, "c04d")
|
184 |
|
|
|
|
|
|
|
|
|
|
|
185 |
template1 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on weather being a sunny bright day and the today's date is 1st july 2024, use the following pieces of context,
|
186 |
memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
|
187 |
Use fifteen sentences maximum. Keep the answer as detailed as possible. Always include the address, time, date, and
|
|
|
200 |
QA_CHAIN_PROMPT_1 = PromptTemplate(input_variables=["context", "question"], template=template1)
|
201 |
QA_CHAIN_PROMPT_2 = PromptTemplate(input_variables=["context", "question"], template=template2)
|
202 |
|
|
|
203 |
def build_qa_chain(prompt_template):
|
204 |
qa_chain = RetrievalQA.from_chain_type(
|
205 |
llm=chat_model,
|
|
|
216 |
]
|
217 |
return qa_chain, tools
|
218 |
|
|
|
219 |
def initialize_agent_with_prompt(prompt_template):
|
220 |
qa_chain, tools = build_qa_chain(prompt_template)
|
221 |
agent = initialize_agent(
|
|
|
229 |
)
|
230 |
return agent
|
231 |
|
|
|
232 |
def generate_answer(message, choice):
|
233 |
logging.debug(f"generate_answer called with prompt_choice: {choice}")
|
234 |
|
|
|
241 |
agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_2)
|
242 |
response = agent(message)
|
243 |
|
|
|
244 |
addresses = extract_addresses(response['output'])
|
245 |
return response['output'], addresses
|
246 |
|
247 |
+
def bot(history, choice, tts_choice):
|
248 |
if not history:
|
249 |
return history
|
250 |
response, addresses = generate_answer(history[-1][0], choice)
|
251 |
history[-1][1] = ""
|
252 |
|
|
|
253 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
254 |
+
if tts_choice == "Eleven Labs":
|
255 |
+
audio_future = executor.submit(generate_audio_elevenlabs, response)
|
256 |
+
elif tts_choice == "Parler-TTS":
|
257 |
+
audio_future = executor.submit(generate_audio_parler_tts, response)
|
258 |
|
259 |
for character in response:
|
260 |
history[-1][1] += character
|
261 |
+
time.sleep(0.05)
|
262 |
yield history, None
|
263 |
|
264 |
audio_path = audio_future.result()
|
|
|
275 |
if not isinstance(response, str):
|
276 |
response = str(response)
|
277 |
address_patterns = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
|
279 |
r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
|
280 |
r'([A-Z].*,\sAL\s\d{5})',
|
281 |
r'([A-Z].*,.*\sSt,\sBirmingham,\sAL\s\d{5})',
|
282 |
r'([A-Z].*,.*\sStreets,\sBirmingham,\sAL\s\d{5})',
|
283 |
r'(\d{2}.*\sStreets)',
|
284 |
+
r'([A-Z].*\s\d{2},\sBirmingham,\sAL\s\d{5})',
|
285 |
r'([a-zA-Z]\s Birmingham)'
|
286 |
]
|
287 |
addresses = []
|
|
|
386 |
else:
|
387 |
return "<p>Failed to fetch local news</p>"
|
388 |
|
|
|
389 |
import numpy as np
|
390 |
import torch
|
391 |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
|
|
393 |
model_id = 'openai/whisper-large-v3'
|
394 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
395 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
396 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
|
|
|
|
|
397 |
processor = AutoProcessor.from_pretrained(model_id)
|
398 |
|
|
|
399 |
pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=True)
|
400 |
|
401 |
base_audio_drive = "/data/audio"
|
402 |
|
|
|
|
|
403 |
def transcribe_function(stream, new_chunk):
|
404 |
try:
|
405 |
sr, y = new_chunk[0], new_chunk[1]
|
|
|
438 |
|
439 |
def generate_audio_elevenlabs(text):
|
440 |
XI_API_KEY = os.environ['ELEVENLABS_API']
|
441 |
+
VOICE_ID = 'd9MIrwLnvDeH7aZb61E9'
|
442 |
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
|
443 |
headers = {
|
444 |
"Accept": "application/json",
|
|
|
450 |
"voice_settings": {
|
451 |
"stability": 1.0,
|
452 |
"similarity_boost": 0.0,
|
453 |
+
"style": 0.60,
|
454 |
"use_speaker_boost": False
|
455 |
}
|
456 |
}
|
|
|
466 |
logging.error(f"Error generating audio: {response.text}")
|
467 |
return None
|
468 |
|
469 |
+
repo_id = "parler-tts/parler-tts-mini-expresso"
|
470 |
+
|
471 |
+
parler_model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
|
472 |
+
parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
473 |
+
parler_feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
|
474 |
+
|
475 |
+
SAMPLE_RATE = parler_feature_extractor.sampling_rate
|
476 |
+
SEED = 42
|
477 |
+
|
478 |
+
def preprocess(text):
|
479 |
+
number_normalizer = EnglishNumberNormalizer()
|
480 |
+
text = number_normalizer(text).strip()
|
481 |
+
if text[-1] not in punctuation:
|
482 |
+
text = f"{text}."
|
483 |
+
|
484 |
+
abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
|
485 |
+
|
486 |
+
def separate_abb(chunk):
|
487 |
+
chunk = chunk.replace(".", "")
|
488 |
+
return " ".join(chunk)
|
489 |
+
|
490 |
+
abbreviations = re.findall(abbreviations_pattern, text)
|
491 |
+
for abv in abbreviations:
|
492 |
+
if abv in text:
|
493 |
+
text = text.replace(abv, separate_abb(abv))
|
494 |
+
return text
|
495 |
+
|
496 |
+
def generate_audio_parler_tts(text):
|
497 |
+
description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
|
498 |
+
inputs = parler_tokenizer(description, return_tensors="pt").to(device)
|
499 |
+
prompt = parler_tokenizer(preprocess(text), return_tensors="pt").to(device)
|
500 |
+
|
501 |
+
set_seed(SEED)
|
502 |
+
generation = parler_model.generate(input_ids=inputs.input_ids, prompt_input_ids=prompt.input_ids)
|
503 |
+
audio_arr = generation.cpu().numpy().squeeze()
|
504 |
+
|
505 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
506 |
+
f.write(audio_arr.tobytes())
|
507 |
+
temp_audio_path = f.name
|
508 |
+
|
509 |
+
logging.debug(f"Audio saved to {temp_audio_path}")
|
510 |
+
return temp_audio_path
|
511 |
+
|
512 |
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
|
513 |
pipe = pipe.to("cuda")
|
514 |
|
|
|
521 |
).images[0]
|
522 |
return image
|
523 |
|
524 |
+
hardcoded_prompt_1 = "Give a high quality photograph of a great looking red 2026 Bentley coupe against a skyline setting in the night, michael mann style in omaha enticing the consumer to buy this product"
|
525 |
+
hardcoded_prompt_2 = "A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
|
|
|
526 |
hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
|
527 |
|
528 |
def update_images():
|
|
|
531 |
image_3 = generate_image(hardcoded_prompt_3)
|
532 |
return image_1, image_2, image_3
|
533 |
|
|
|
|
|
534 |
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
|
535 |
with gr.Row():
|
536 |
with gr.Column():
|
537 |
state = gr.State()
|
|
|
542 |
gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
|
543 |
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
|
544 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
545 |
+
tts_choice = gr.Radio(label="Select TTS System", choices=["Eleven Labs", "Parler-TTS"], value="Eleven Labs")
|
546 |
+
bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
|
547 |
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
|
548 |
chatbot.like(print_like_dislike, None, None)
|
549 |
clear_button = gr.Button("Clear")
|
550 |
clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
|
551 |
|
|
|
552 |
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
|
553 |
audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
|
554 |
|
|
|
559 |
with gr.Column():
|
560 |
weather_output = gr.HTML(value=fetch_local_weather())
|
561 |
news_output = gr.HTML(value=fetch_local_news())
|
562 |
+
events_output = gr.HTML(value=fetch_local_events())
|
563 |
|
564 |
with gr.Column():
|
|
|
565 |
image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
|
566 |
image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
|
567 |
image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
|
568 |
|
|
|
569 |
refresh_button = gr.Button("Refresh Images")
|
570 |
refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
|
571 |
+
|
572 |
demo.queue()
|
573 |
demo.launch(share=True)
|
|