Pijush2023 commited on
Commit
37755ba
·
verified ·
1 Parent(s): c186900

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -774
app.py CHANGED
@@ -1,608 +1,3 @@
1
- # import subprocess
2
- # import sys
3
-
4
- # def install_parler_tts():
5
- # subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/parler-tts.git"])
6
-
7
- # # Call the function to install parler-tts
8
- # install_parler_tts()
9
-
10
-
11
- # import gradio as gr
12
- # import requests
13
- # import os
14
- # import time
15
- # import re
16
- # import logging
17
- # import tempfile
18
- # import folium
19
- # import concurrent.futures
20
- # import torch
21
- # from PIL import Image
22
- # from datetime import datetime
23
- # from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
24
- # from googlemaps import Client as GoogleMapsClient
25
- # from gtts import gTTS
26
- # from diffusers import StableDiffusion3Pipeline
27
- # import soundfile as sf
28
-
29
- # from langchain_openai import OpenAIEmbeddings, ChatOpenAI
30
- # from langchain_pinecone import PineconeVectorStore
31
- # from langchain.prompts import PromptTemplate
32
- # from langchain.chains import RetrievalQA
33
- # from langchain.chains.conversation.memory import ConversationBufferWindowMemory
34
- # from langchain.agents import Tool, initialize_agent
35
- # from huggingface_hub import login
36
-
37
- # # Check if the token is already set in the environment variables
38
- # hf_token = os.getenv("HF_TOKEN")
39
-
40
- # if hf_token is None:
41
- # # If the token is not set, prompt for it (this should be done securely)
42
- # print("Please set your Hugging Face token in the environment variables.")
43
- # else:
44
- # # Login using the token
45
- # login(token=hf_token)
46
-
47
- # # Your application logic goes here
48
- # print("Logged in successfully to Hugging Face Hub!")
49
-
50
- # # Set up logging
51
- # logging.basicConfig(level=logging.DEBUG)
52
-
53
- # # Initialize OpenAI embeddings
54
- # embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
55
-
56
- # # Initialize Pinecone
57
- # from pinecone import Pinecone
58
- # pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
59
-
60
- # index_name = "birmingham-dataset"
61
- # vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
62
- # retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
63
-
64
- # # Initialize ChatOpenAI model
65
- # chat_model = ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'],
66
- # temperature=0, model='gpt-4o')
67
-
68
- # conversational_memory = ConversationBufferWindowMemory(
69
- # memory_key='chat_history',
70
- # k=10,
71
- # return_messages=True
72
- # )
73
-
74
- # def get_current_time_and_date():
75
- # now = datetime.now()
76
- # return now.strftime("%Y-%m-%d %H:%M:%S")
77
-
78
- # # Example usage
79
- # current_time_and_date = get_current_time_and_date()
80
-
81
- # def fetch_local_events():
82
- # api_key = os.environ['SERP_API']
83
- # url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
84
-
85
- # response = requests.get(url)
86
- # if response.status_code == 200:
87
- # events_results = response.json().get("events_results", [])
88
- # events_html = """
89
- # <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Local Events</h2>
90
- # <style>
91
- # .event-item {
92
- # font-family: 'Verdana', sans-serif;
93
- # color: #333;
94
- # margin-bottom: 15px;
95
- # padding: 10px;
96
- # font-weight: bold;
97
- # }
98
- # .event-item a {
99
- # color: #1E90FF;
100
- # text-decoration: none;
101
- # }
102
- # .event-item a:hover {
103
- # text-decoration: underline;
104
- # }
105
- # </style>
106
- # """
107
- # for index, event in enumerate(events_results):
108
- # title = event.get("title", "No title")
109
- # date = event.get("date", "No date")
110
- # location = event.get("address", "No location")
111
- # link = event.get("link", "#")
112
- # events_html += f"""
113
- # <div class="event-item">
114
- # <a href='{link}' target='_blank'>{index + 1}. {title}</a>
115
- # <p>Date: {date}<br>Location: {location}</p>
116
- # </div>
117
- # """
118
- # return events_html
119
- # else:
120
- # return "<p>Failed to fetch local events</p>"
121
-
122
- # def fetch_local_weather():
123
- # try:
124
- # api_key = os.environ['WEATHER_API']
125
- # url = f'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/birmingham?unitGroup=metric&include=events%2Calerts%2Chours%2Cdays%2Ccurrent&key={api_key}'
126
- # response = requests.get(url)
127
- # response.raise_for_status()
128
- # jsonData = response.json()
129
-
130
- # current_conditions = jsonData.get("currentConditions", {})
131
- # temp_celsius = current_conditions.get("temp", "N/A")
132
-
133
- # if temp_celsius != "N/A":
134
- # temp_fahrenheit = int((temp_celsius * 9/5) + 32)
135
- # else:
136
- # temp_fahrenheit = "N/A"
137
-
138
- # condition = current_conditions.get("conditions", "N/A")
139
- # humidity = current_conditions.get("humidity", "N/A")
140
-
141
- # weather_html = f"""
142
- # <div class="weather-theme">
143
- # <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Local Weather</h2>
144
- # <div class="weather-content">
145
- # <div class="weather-icon">
146
- # <img src="https://www.weatherbit.io/static/img/icons/{get_weather_icon(condition)}.png" alt="{condition}" style="width: 100px; height: 100px;">
147
- # </div>
148
- # <div class="weather-details">
149
- # <p style="font-family: 'Verdana', sans-serif; color: #333; font-size: 1.2em;">Temperature: {temp_fahrenheit}°F</p>
150
- # <p style="font-family: 'Verdana', sans-serif; color: #333; font-size: 1.2em;">Condition: {condition}</p>
151
- # <p style="font-family: 'Verdana', sans-serif; color: #333; font-size: 1.2em;">Humidity: {humidity}%</p>
152
- # </div>
153
- # </div>
154
- # </div>
155
- # <style>
156
- # .weather-theme {{
157
- # animation: backgroundAnimation 10s infinite alternate;
158
- # border-radius: 10px;
159
- # padding: 10px;
160
- # margin-bottom: 15px;
161
- # background: linear-gradient(45deg, #ffcc33, #ff6666, #ffcc33, #ff6666);
162
- # background-size: 400% 400%;
163
- # box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
164
- # transition: box-shadow 0.3s ease, background-color 0.3s ease;
165
- # }}
166
- # .weather-theme:hover {{
167
- # box-shadow: 0 8px 16px rgba(0, 0, 0, 0.2);
168
- # background-position: 100% 100%;
169
- # }}
170
- # @keyframes backgroundAnimation {{
171
- # 0% {{ background-position: 0% 50%; }}
172
- # 100% {{ background-position: 100% 50%; }}
173
- # }}
174
- # .weather-content {{
175
- # display: flex;
176
- # align-items: center;
177
- # }}
178
- # .weather-icon {{
179
- # flex: 1;
180
- # }}
181
- # .weather-details {{
182
- # flex: 3;
183
- # }}
184
- # </style>
185
- # """
186
- # return weather_html
187
- # except requests.exceptions.RequestException as e:
188
- # return f"<p>Failed to fetch local weather: {e}</p>"
189
-
190
- # def get_weather_icon(condition):
191
- # condition_map = {
192
- # "Clear": "c01d",
193
- # "Partly Cloudy": "c02d",
194
- # "Cloudy": "c03d",
195
- # "Overcast": "c04d",
196
- # "Mist": "a01d",
197
- # "Patchy rain possible": "r01d",
198
- # "Light rain": "r02d",
199
- # "Moderate rain": "r03d",
200
- # "Heavy rain": "r04d",
201
- # "Snow": "s01d",
202
- # "Thunderstorm": "t01d",
203
- # "Fog": "a05d",
204
- # }
205
- # return condition_map.get(condition, "c04d")
206
-
207
- # # Update prompt templates to include fetched details
208
-
209
- # current_time_and_date = get_current_time_and_date()
210
-
211
- # # Define prompt templates
212
- # template1 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on weather being a sunny bright day and the today's date is 1st july 2024, use the following pieces of context,
213
- # memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
214
- # Use fifteen sentences maximum. Keep the answer as detailed as possible. Always include the address, time, date, and
215
- # event type and description. Always say "It was my pleasure!" at the end of the answer.
216
- # {context}
217
- # Question: {question}
218
- # Helpful Answer:"""
219
-
220
- # template2 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on today's weather being a sunny bright day and today's date is 1st july 2024, take the location or address but don't show the location or address on the output prompts. Use the following pieces of context,
221
- # memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
222
- # Keep the answer short and sweet and crisp. Always say "It was my pleasure!" at the end of the answer.
223
- # {context}
224
- # Question: {question}
225
- # Helpful Answer:"""
226
-
227
- # QA_CHAIN_PROMPT_1 = PromptTemplate(input_variables=["context", "question"], template=template1)
228
- # QA_CHAIN_PROMPT_2 = PromptTemplate(input_variables=["context", "question"], template=template2)
229
-
230
- # # Define the retrieval QA chain
231
- # def build_qa_chain(prompt_template):
232
- # qa_chain = RetrievalQA.from_chain_type(
233
- # llm=chat_model,
234
- # chain_type="stuff",
235
- # retriever=retriever,
236
- # chain_type_kwargs={"prompt": prompt_template}
237
- # )
238
- # tools = [
239
- # Tool(
240
- # name='Knowledge Base',
241
- # func=qa_chain,
242
- # description='Use this tool when answering general knowledge queries to get more information about the topic'
243
- # )
244
- # ]
245
- # return qa_chain, tools
246
-
247
- # # Define the agent initializer
248
- # def initialize_agent_with_prompt(prompt_template):
249
- # qa_chain, tools = build_qa_chain(prompt_template)
250
- # agent = initialize_agent(
251
- # agent='chat-conversational-react-description',
252
- # tools=tools,
253
- # llm=chat_model,
254
- # verbose=False,
255
- # max_iteration=5,
256
- # early_stopping_method='generate',
257
- # memory=conversational_memory
258
- # )
259
- # return agent
260
-
261
- # # Define the function to generate answers
262
- # def generate_answer(message, choice):
263
- # logging.debug(f"generate_answer called with prompt_choice: {choice}")
264
-
265
- # if choice == "Details":
266
- # agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_1)
267
- # elif choice == "Conversational":
268
- # agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_2)
269
- # else:
270
- # logging.error(f"Invalid prompt_choice: {choice}. Defaulting to 'Conversational'")
271
- # agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_2)
272
- # response = agent(message)
273
-
274
- # # Extract addresses for mapping regardless of the choice
275
- # addresses = extract_addresses(response['output'])
276
- # return response['output'], addresses
277
-
278
- # def bot(history, choice, tts_model):
279
- # if not history:
280
- # return history
281
- # response, addresses = generate_answer(history[-1][0], choice)
282
- # history[-1][1] = ""
283
-
284
- # # Generate audio for the entire response in a separate thread
285
- # with concurrent.futures.ThreadPoolExecutor() as executor:
286
- # if tts_model == "ElevenLabs":
287
- # audio_future = executor.submit(generate_audio_elevenlabs, response)
288
- # else:
289
- # audio_future = executor.submit(generate_audio_parler_tts, response)
290
-
291
- # for character in response:
292
- # history[-1][1] += character
293
- # time.sleep(0.05) # Adjust the speed of text appearance
294
- # yield history, None
295
-
296
- # audio_path = audio_future.result()
297
- # yield history, audio_path
298
-
299
-
300
- # def add_message(history, message):
301
- # history.append((message, None))
302
- # return history, gr.Textbox(value="", interactive=True, placeholder="Enter message or upload file...", show_label=False)
303
-
304
- # def print_like_dislike(x: gr.LikeData):
305
- # print(x.index, x.value, x.liked)
306
-
307
- # def extract_addresses(response):
308
- # if not isinstance(response, str):
309
- # response = str(response)
310
- # address_patterns = [
311
- # r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
312
- # r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
313
- # r'([A-Z].*,\sAL\s\d{5})',
314
- # r'([A-Z].*,.*\sSt,\sBirmingham,\sAL\s\d{5})',
315
- # r'([A-Z].*,.*\sStreets,\sBirmingham,\sAL\s\d{5})',
316
- # r'(\d{2}.*\sStreets)',
317
- # r'([A-Z].*\s\d{2},\sBirmingham,\sAL\s\d{5})'
318
- # r'([a-zA-Z]\s Birmingham)'
319
- # ]
320
- # addresses = []
321
- # for pattern in address_patterns:
322
- # addresses.extend(re.findall(pattern, response))
323
- # return addresses
324
-
325
- # all_addresses = []
326
-
327
- # def generate_map(location_names):
328
- # global all_addresses
329
- # all_addresses.extend(location_names)
330
-
331
- # api_key = os.environ['GOOGLEMAPS_API_KEY']
332
- # gmaps = GoogleMapsClient(key=api_key)
333
-
334
- # m = folium.Map(location=[33.5175,-86.809444], zoom_start=16)
335
-
336
- # for location_name in all_addresses:
337
- # geocode_result = gmaps.geocode(location_name)
338
- # if geocode_result:
339
- # location = geocode_result[0]['geometry']['location']
340
- # folium.Marker(
341
- # [location['lat'], location['lng']],
342
- # tooltip=f"{geocode_result[0]['formatted_address']}"
343
- # ).add_to(m)
344
-
345
- # map_html = m._repr_html_()
346
- # return map_html
347
-
348
- # def fetch_local_news():
349
- # api_key = os.environ['SERP_API']
350
- # url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
351
- # response = requests.get(url)
352
- # if response.status_code == 200:
353
- # results = response.json().get("news_results", [])
354
- # news_html = """
355
- # <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
356
- # <style>
357
- # .news-item {
358
- # font-family: 'Verdana', sans-serif;
359
- # color: #333;
360
- # background-color: #f0f8ff;
361
- # margin-bottom: 15px;
362
- # padding: 10px;
363
- # border-radius: 5px;
364
- # transition: box-shadow 0.3s ease, background-color 0.3s ease;
365
- # font-weight: bold;
366
- # }
367
- # .news-item:hover {
368
- # box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
369
- # background-color: #e6f7ff;
370
- # }
371
- # .news-item a {
372
- # color: #1E90FF;
373
- # text-decoration: none;
374
- # font-weight: bold;
375
- # }
376
- # .news-item a:hover {
377
- # text-decoration: underline;
378
- # }
379
- # .news-preview {
380
- # position: absolute;
381
- # display: none;
382
- # border: 1px solid #ccc;
383
- # border-radius: 5px;
384
- # box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
385
- # background-color: white;
386
- # z-index: 1000;
387
- # max-width: 300px;
388
- # padding: 10px;
389
- # font-family: 'Verdana', sans-serif;
390
- # color: #333;
391
- # }
392
- # </style>
393
- # <script>
394
- # function showPreview(event, previewContent) {
395
- # var previewBox = document.getElementById('news-preview');
396
- # previewBox.innerHTML = previewContent;
397
- # previewBox.style.left = event.pageX + 'px';
398
- # previewBox.style.top = event.pageY + 'px';
399
- # previewBox.style.display = 'block';
400
- # }
401
- # function hidePreview() {
402
- # var previewBox = document.getElementById('news-preview');
403
- # previewBox.style.display = 'none';
404
- # }
405
- # </script>
406
- # <div id="news-preview" class="news-preview"></div>
407
- # """
408
- # for index, result in enumerate(results[:7]):
409
- # title = result.get("title", "No title")
410
- # link = result.get("link", "#")
411
- # snippet = result.get("snippet", "")
412
- # news_html += f"""
413
- # <div class="news-item" onmouseover="showPreview(event, '{snippet}')" onmouseout="hidePreview()">
414
- # <a href='{link}' target='_blank'>{index + 1}. {title}</a>
415
- # <p>{snippet}</p>
416
- # </div>
417
- # """
418
- # return news_html
419
- # else:
420
- # return "<p>Failed to fetch local news</p>"
421
-
422
- # # Voice Control
423
- # import numpy as np
424
- # import torch
425
- # from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
426
- # from parler_tts import ParlerTTSForConditionalGeneration
427
- # from transformers import AutoTokenizer
428
-
429
- # model_id = 'openai/whisper-large-v3'
430
- # device = "cuda:0" if torch.cuda.is_available() else "cpu"
431
- # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
432
- # model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype,
433
- # #low_cpu_mem_usage=True,
434
- # use_safetensors=True).to(device)
435
- # processor = AutoProcessor.from_pretrained(model_id)
436
-
437
- # # Optimized ASR pipeline
438
- # pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=True)
439
-
440
- # base_audio_drive = "/data/audio"
441
-
442
- # import numpy as np
443
-
444
- # def transcribe_function(stream, new_chunk):
445
- # try:
446
- # sr, y = new_chunk[0], new_chunk[1]
447
- # except TypeError:
448
- # print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
449
- # return stream, "", None
450
-
451
- # y = y.astype(np.float32) / np.max(np.abs(y))
452
-
453
- # if stream is not None:
454
- # stream = np.concatenate([stream, y])
455
- # else:
456
- # stream = y
457
-
458
- # result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
459
-
460
- # full_text = result.get("text", "")
461
-
462
- # return stream, full_text, result
463
-
464
- # def update_map_with_response(history):
465
- # if not history:
466
- # return ""
467
- # response = history[-1][1]
468
- # addresses = extract_addresses(response)
469
- # return generate_map(addresses)
470
-
471
- # def clear_textbox():
472
- # return ""
473
-
474
- # def show_map_if_details(history,choice):
475
- # if choice in ["Details", "Conversational"]:
476
- # return gr.update(visible=True), update_map_with_response(history)
477
- # else:
478
- # return gr.update(visible=False), ""
479
-
480
- # def generate_audio_elevenlabs(text):
481
- # XI_API_KEY = os.environ['ELEVENLABS_API']
482
- # VOICE_ID = 'd9MIrwLnvDeH7aZb61E9' # Replace with your voice ID
483
- # tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
484
- # headers = {
485
- # "Accept": "application/json",
486
- # "xi-api-key": XI_API_KEY
487
- # }
488
- # data = {
489
- # "text": str(text),
490
- # "model_id": "eleven_multilingual_v2",
491
- # "voice_settings": {
492
- # "stability": 1.0,
493
- # "similarity_boost": 0.0,
494
- # "style": 0.60, # Adjust style for more romantic tone
495
- # "use_speaker_boost": False
496
- # }
497
- # }
498
- # response = requests.post(tts_url, headers=headers, json=data, stream=True)
499
- # if response.ok:
500
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
501
- # for chunk in response.iter_content(chunk_size=1024):
502
- # f.write(chunk)
503
- # temp_audio_path = f.name
504
- # logging.debug(f"Audio saved to {temp_audio_path}")
505
- # return temp_audio_path
506
- # else:
507
- # logging.error(f"Error generating audio: {response.text}")
508
- # return None
509
-
510
- # def generate_audio_parler_tts(text):
511
- # model_id = 'parler-tts/parler_tts_mini_v0.1'
512
- # device = "cuda:0" if torch.cuda.is_available() else "cpu"
513
- # try:
514
- # model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
515
- # except torch.cuda.OutOfMemoryError:
516
- # print("CUDA out of memory. Switching to CPU.")
517
- # device = "cpu"
518
- # model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
519
- # tokenizer = AutoTokenizer.from_pretrained(model_id)
520
-
521
- # description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
522
-
523
- # input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
524
- # prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
525
-
526
- # generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
527
- # audio_arr = generation.cpu().numpy().squeeze()
528
-
529
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
530
- # sf.write(f.name, audio_arr, model.config.sampling_rate)
531
- # temp_audio_path = f.name
532
-
533
- # logging.debug(f"Audio saved to {temp_audio_path}")
534
- # return temp_audio_path
535
-
536
- # # Stable Diffusion setup
537
- # pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
538
- # pipe = pipe.to("cuda")
539
-
540
- # def generate_image(prompt):
541
- # image = pipe(
542
- # prompt,
543
- # negative_prompt="",
544
- # num_inference_steps=28,
545
- # guidance_scale=3.0,
546
- # ).images[0]
547
- # return image
548
-
549
- # # Hardcoded prompt for image generation
550
- # hardcoded_prompt_1="Give a high quality photograph of a great looking red 2026 Bentley coupe against a skyline setting in th night, michael mann style in omaha enticing the consumer to buy this product"
551
- # hardcoded_prompt_2="A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
552
- # hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
553
-
554
- # def update_images():
555
- # image_1 = generate_image(hardcoded_prompt_1)
556
- # image_2 = generate_image(hardcoded_prompt_2)
557
- # image_3 = generate_image(hardcoded_prompt_3)
558
- # return image_1, image_2, image_3
559
-
560
- # with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
561
-
562
- # with gr.Row():
563
- # with gr.Column():
564
- # state = gr.State()
565
-
566
- # chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
567
- # choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
568
- # tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
569
-
570
- # gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
571
- # chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
572
- # chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
573
- # bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
574
- # bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
575
- # chatbot.like(print_like_dislike, None, None)
576
- # clear_button = gr.Button("Clear")
577
- # clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
578
-
579
-
580
- # audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
581
- # audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
582
-
583
- # # gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
584
- # # location_output = gr.HTML()
585
- # # bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
586
-
587
- # # with gr.Column():
588
- # # weather_output = gr.HTML(value=fetch_local_weather())
589
- # # news_output = gr.HTML(value=fetch_local_news())
590
- # # news_output = gr.HTML(value=fetch_local_events())
591
-
592
- # with gr.Column():
593
-
594
- # image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
595
- # image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
596
- # image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
597
-
598
-
599
- # refresh_button = gr.Button("Refresh Images")
600
- # refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
601
-
602
- # demo.queue()
603
- # demo.launch(share=True)
604
-
605
-
606
  import subprocess
607
  import sys
608
 
@@ -612,6 +7,7 @@ def install_parler_tts():
612
  # Call the function to install parler-tts
613
  install_parler_tts()
614
 
 
615
  import gradio as gr
616
  import requests
617
  import os
@@ -637,15 +33,6 @@ from langchain.chains import RetrievalQA
637
  from langchain.chains.conversation.memory import ConversationBufferWindowMemory
638
  from langchain.agents import Tool, initialize_agent
639
  from huggingface_hub import login
640
- from typing import Optional
641
-
642
- from pydub import AudioSegment
643
- import io
644
- import math
645
- from threading import Thread
646
- from queue import Queue
647
- from transformers.generation.streamers import BaseStreamer
648
- import numpy as np
649
 
650
  # Check if the token is already set in the environment variables
651
  hf_token = os.getenv("HF_TOKEN")
@@ -822,17 +209,17 @@ def get_weather_icon(condition):
822
  current_time_and_date = get_current_time_and_date()
823
 
824
  # Define prompt templates
825
- template1 = """You are an expert concierge who is helpful and a renowned guide for Birmingham, Alabama. Based on weather being a sunny bright day and today's date being 1st July 2024, use the following pieces of context,
826
- memory, and message history, along with your knowledge of perennial events in Birmingham, Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
827
  Use fifteen sentences maximum. Keep the answer as detailed as possible. Always include the address, time, date, and
828
  event type and description. Always say "It was my pleasure!" at the end of the answer.
829
  {context}
830
  Question: {question}
831
  Helpful Answer:"""
832
 
833
- template2 = """You are an expert concierge who is helpful and a renowned guide for Birmingham, Alabama. Based on today's weather being a sunny bright day and today's date being 1st July 2024, take the location or address but don't show the location or address on the output prompts. Use the following pieces of context,
834
- memory, and message history, along with your knowledge of perennial events in Birmingham, Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
835
- Keep the answer short, sweet, and crisp. Always say "It was my pleasure!" at the end of the answer.
836
  {context}
837
  Question: {question}
838
  Helpful Answer:"""
@@ -892,7 +279,7 @@ def bot(history, choice, tts_model):
892
  if not history:
893
  return history
894
  response, addresses = generate_answer(history[-1][0], choice)
895
- history[-1][1] = response
896
 
897
  # Generate audio for the entire response in a separate thread
898
  with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -909,6 +296,7 @@ def bot(history, choice, tts_model):
909
  audio_path = audio_future.result()
910
  yield history, audio_path
911
 
 
912
  def add_message(history, message):
913
  history.append((message, None))
914
  return history, gr.Textbox(value="", interactive=True, placeholder="Enter message or upload file...", show_label=False)
@@ -1119,175 +507,31 @@ def generate_audio_elevenlabs(text):
1119
  logging.error(f"Error generating audio: {response.text}")
1120
  return None
1121
 
1122
- # Changes start here
1123
- class ParlerTTSStreamer(BaseStreamer):
1124
- def __init__(
1125
- self,
1126
- model: ParlerTTSForConditionalGeneration,
1127
- device: Optional[str] = None,
1128
- play_steps: Optional[int] = 10,
1129
- stride: Optional[int] = None,
1130
- timeout: Optional[float] = None,
1131
- ):
1132
- self.decoder = model.decoder
1133
- self.audio_encoder = model.audio_encoder
1134
- self.generation_config = model.generation_config
1135
- self.device = device if device is not None else model.device
1136
-
1137
- self.play_steps = play_steps
1138
- if stride is not None:
1139
- self.stride = stride
1140
- else:
1141
- hop_length = math.floor(self.audio_encoder.config.sampling_rate / self.audio_encoder.config.frame_rate)
1142
- self.stride = hop_length * (play_steps - self.decoder.num_codebooks) // 6
1143
- self.token_cache = None
1144
- self.to_yield = 0
1145
-
1146
- self.audio_queue = Queue()
1147
- self.stop_signal = None
1148
- self.timeout = timeout
1149
-
1150
- def apply_delay_pattern_mask(self, input_ids):
1151
- _, delay_pattern_mask = self.decoder.build_delay_pattern_mask(
1152
- input_ids[:, :1],
1153
- bos_token_id=self.generation_config.bos_token_id,
1154
- pad_token_id=self.generation_config.decoder_start_token_id,
1155
- max_length=input_ids.shape[-1],
1156
- )
1157
- input_ids = self.decoder.apply_delay_pattern_mask(input_ids, delay_pattern_mask)
1158
-
1159
- mask = (delay_pattern_mask != self.generation_config.bos_token_id) & (delay_pattern_mask != self.generation_config.pad_token_id)
1160
- input_ids = input_ids[mask].reshape(1, self.decoder.num_codebooks, -1)
1161
- input_ids = input_ids[None, ...]
1162
-
1163
- input_ids = input_ids.to(self.audio_encoder.device)
1164
-
1165
- decode_sequentially = (
1166
- self.generation_config.bos_token_id in input_ids
1167
- or self.generation_config.pad_token_id in input_ids
1168
- or self.generation_config.eos_token_id in input_ids
1169
- )
1170
- if not decode_sequentially:
1171
- output_values = self.audio_encoder.decode(
1172
- input_ids,
1173
- audio_scales=[None],
1174
- )
1175
- else:
1176
- sample = input_ids[:, 0]
1177
- sample_mask = (sample >= self.audio_encoder.config.codebook_size).sum(dim=(0, 1)) == 0
1178
- sample = sample[:, :, sample_mask]
1179
- output_values = self.audio_encoder.decode(sample[None, ...], [None])
1180
-
1181
- audio_values = output_values.audio_values[0, 0]
1182
- return audio_values.cpu().float().numpy()
1183
-
1184
- def put(self, value):
1185
- batch_size = value.shape[0] // self.decoder.num_codebooks
1186
- if batch_size > 1:
1187
- raise ValueError("ParlerTTSStreamer only supports batch size 1")
1188
-
1189
- if self.token_cache is None:
1190
- self.token_cache = value
1191
- else:
1192
- self.token_cache = torch.concatenate([self.token_cache, value[:, None]], dim=-1)
1193
-
1194
- if self.token_cache.shape[-1] % self.play_steps == 0:
1195
- audio_values = self.apply_delay_pattern_mask(self.token_cache)
1196
- self.on_finalized_audio(audio_values[self.to_yield : -self.stride])
1197
- self.to_yield += len(audio_values) - self.to_yield - self.stride
1198
-
1199
- def end(self):
1200
- if self.token_cache is not None:
1201
- audio_values = self.apply_delay_pattern_mask(self.token_cache)
1202
- else:
1203
- audio_values = np.zeros(self.to_yield)
1204
-
1205
- self.on_finalized_audio(audio_values[self.to_yield :], stream_end=True)
1206
-
1207
- def on_finalized_audio(self, audio: np.ndarray, stream_end: bool = False):
1208
- self.audio_queue.put(audio, timeout=self.timeout)
1209
- if stream_end:
1210
- self.audio_queue.put(self.stop_signal, timeout=self.timeout)
1211
-
1212
- def __iter__(self):
1213
- return self
1214
-
1215
- def __next__(self):
1216
- value = self.audio_queue.get(timeout=self.timeout)
1217
- if not isinstance(value, np.ndarray) and value == self.stop_signal:
1218
- raise StopIteration()
1219
- else:
1220
- return value
1221
-
1222
- def numpy_to_mp3(audio_array, sampling_rate):
1223
- if np.issubdtype(audio_array.dtype, np.floating):
1224
- max_val = np.max(np.abs(audio_array))
1225
- audio_array = (audio_array / max_val) * 32767
1226
- audio_array = audio_array.astype(np.int16)
1227
-
1228
- audio_segment = AudioSegment(
1229
- audio_array.tobytes(),
1230
- frame_rate=sampling_rate,
1231
- sample_width=audio_array.dtype.itemsize,
1232
- channels=1
1233
- )
1234
-
1235
- mp3_io = io.BytesIO()
1236
- audio_segment.export(mp3_io, format="mp3", bitrate="320k")
1237
-
1238
- mp3_bytes = mp3_io.getvalue()
1239
- mp3_io.close()
1240
-
1241
- return mp3_bytes
1242
-
1243
  def generate_audio_parler_tts(text):
1244
  model_id = 'parler-tts/parler_tts_mini_v0.1'
1245
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
1246
-
1247
  try:
1248
  model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
1249
  except torch.cuda.OutOfMemoryError:
1250
  print("CUDA out of memory. Switching to CPU.")
1251
  device = "cpu"
1252
  model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
1253
-
1254
  tokenizer = AutoTokenizer.from_pretrained(model_id)
1255
- sampling_rate = model.audio_encoder.config.sampling_rate
1256
- frame_rate = model.audio_encoder.config.frame_rate
1257
 
1258
  description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
1259
- play_steps_in_s = 2.0
1260
- play_steps = int(frame_rate * play_steps_in_s)
1261
-
1262
- streamer = ParlerTTSStreamer(model, device=device, play_steps=play_steps)
1263
-
1264
- inputs = tokenizer(description, return_tensors="pt").to(device)
1265
- prompt = tokenizer(text, return_tensors="pt").to(device)
1266
 
1267
- generation_kwargs = dict(
1268
- input_ids=inputs.input_ids,
1269
- prompt_input_ids=prompt.input_ids,
1270
- streamer=streamer,
1271
- do_sample=True,
1272
- temperature=1.0,
1273
- min_new_tokens=10,
1274
- )
1275
-
1276
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
1277
- thread.start()
1278
-
1279
- combined_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
1280
- combined_audio = []
1281
-
1282
- for new_audio in streamer:
1283
- print(f"Sample of length: {round(new_audio.shape[0] / sampling_rate, 2)} seconds")
1284
- combined_audio.extend(new_audio)
1285
 
1286
- sf.write(combined_audio_path, combined_audio, sampling_rate)
 
 
 
 
 
1287
 
1288
- logging.debug(f"Combined audio saved to {combined_audio_path}")
1289
- return combined_audio_path
1290
- # Changes end here
1291
 
1292
  # Stable Diffusion setup
1293
  pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
@@ -1358,3 +602,6 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
1358
  demo.queue()
1359
  demo.launch(share=True)
1360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import subprocess
2
  import sys
3
 
 
7
  # Call the function to install parler-tts
8
  install_parler_tts()
9
 
10
+
11
  import gradio as gr
12
  import requests
13
  import os
 
33
  from langchain.chains.conversation.memory import ConversationBufferWindowMemory
34
  from langchain.agents import Tool, initialize_agent
35
  from huggingface_hub import login
 
 
 
 
 
 
 
 
 
36
 
37
  # Check if the token is already set in the environment variables
38
  hf_token = os.getenv("HF_TOKEN")
 
209
  current_time_and_date = get_current_time_and_date()
210
 
211
  # Define prompt templates
212
+ template1 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on weather being a sunny bright day and the today's date is 1st july 2024, use the following pieces of context,
213
+ memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
214
  Use fifteen sentences maximum. Keep the answer as detailed as possible. Always include the address, time, date, and
215
  event type and description. Always say "It was my pleasure!" at the end of the answer.
216
  {context}
217
  Question: {question}
218
  Helpful Answer:"""
219
 
220
+ template2 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on today's weather being a sunny bright day and today's date is 1st july 2024, take the location or address but don't show the location or address on the output prompts. Use the following pieces of context,
221
+ memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
222
+ Keep the answer short and sweet and crisp. Always say "It was my pleasure!" at the end of the answer.
223
  {context}
224
  Question: {question}
225
  Helpful Answer:"""
 
279
  if not history:
280
  return history
281
  response, addresses = generate_answer(history[-1][0], choice)
282
+ history[-1][1] = ""
283
 
284
  # Generate audio for the entire response in a separate thread
285
  with concurrent.futures.ThreadPoolExecutor() as executor:
 
296
  audio_path = audio_future.result()
297
  yield history, audio_path
298
 
299
+
300
  def add_message(history, message):
301
  history.append((message, None))
302
  return history, gr.Textbox(value="", interactive=True, placeholder="Enter message or upload file...", show_label=False)
 
507
  logging.error(f"Error generating audio: {response.text}")
508
  return None
509
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
  def generate_audio_parler_tts(text):
511
  model_id = 'parler-tts/parler_tts_mini_v0.1'
512
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
513
  try:
514
  model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
515
  except torch.cuda.OutOfMemoryError:
516
  print("CUDA out of memory. Switching to CPU.")
517
  device = "cpu"
518
  model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
 
519
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
520
 
521
  description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
 
 
 
 
 
 
 
522
 
523
+ input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
524
+ prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
 
526
+ generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
527
+ audio_arr = generation.cpu().numpy().squeeze()
528
+
529
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
530
+ sf.write(f.name, audio_arr, model.config.sampling_rate)
531
+ temp_audio_path = f.name
532
 
533
+ logging.debug(f"Audio saved to {temp_audio_path}")
534
+ return temp_audio_path
 
535
 
536
  # Stable Diffusion setup
537
  pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
 
602
  demo.queue()
603
  demo.launch(share=True)
604
 
605
+
606
+
607
+