Pijush2023 commited on
Commit
90468b8
·
verified ·
1 Parent(s): 37755ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +622 -11
app.py CHANGED
@@ -1,13 +1,611 @@
1
- import subprocess
2
- import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- def install_parler_tts():
5
- subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/parler-tts.git"])
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # Call the function to install parler-tts
8
- install_parler_tts()
9
 
 
 
 
 
 
 
 
10
 
 
 
11
  import gradio as gr
12
  import requests
13
  import os
@@ -25,7 +623,6 @@ from googlemaps import Client as GoogleMapsClient
25
  from gtts import gTTS
26
  from diffusers import StableDiffusion3Pipeline
27
  import soundfile as sf
28
-
29
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
30
  from langchain_pinecone import PineconeVectorStore
31
  from langchain.prompts import PromptTemplate
@@ -34,6 +631,12 @@ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
34
  from langchain.agents import Tool, initialize_agent
35
  from huggingface_hub import login
36
 
 
 
 
 
 
 
37
  # Check if the token is already set in the environment variables
38
  hf_token = os.getenv("HF_TOKEN")
39
 
@@ -296,7 +899,6 @@ def bot(history, choice, tts_model):
296
  audio_path = audio_future.result()
297
  yield history, audio_path
298
 
299
-
300
  def add_message(history, message):
301
  history.append((message, None))
302
  return history, gr.Textbox(value="", interactive=True, placeholder="Enter message or upload file...", show_label=False)
@@ -523,11 +1125,20 @@ def generate_audio_parler_tts(text):
523
  input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
524
  prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
525
 
526
- generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
527
- audio_arr = generation.cpu().numpy().squeeze()
 
 
 
 
 
 
 
 
 
528
 
529
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
530
- sf.write(f.name, audio_arr, model.config.sampling_rate)
531
  temp_audio_path = f.name
532
 
533
  logging.debug(f"Audio saved to {temp_audio_path}")
 
1
+ # import subprocess
2
+ # import sys
3
+
4
+ # def install_parler_tts():
5
+ # subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/parler-tts.git"])
6
+
7
+ # # Call the function to install parler-tts
8
+ # install_parler_tts()
9
+
10
+
11
+ # import gradio as gr
12
+ # import requests
13
+ # import os
14
+ # import time
15
+ # import re
16
+ # import logging
17
+ # import tempfile
18
+ # import folium
19
+ # import concurrent.futures
20
+ # import torch
21
+ # from PIL import Image
22
+ # from datetime import datetime
23
+ # from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
24
+ # from googlemaps import Client as GoogleMapsClient
25
+ # from gtts import gTTS
26
+ # from diffusers import StableDiffusion3Pipeline
27
+ # import soundfile as sf
28
+
29
+ # from langchain_openai import OpenAIEmbeddings, ChatOpenAI
30
+ # from langchain_pinecone import PineconeVectorStore
31
+ # from langchain.prompts import PromptTemplate
32
+ # from langchain.chains import RetrievalQA
33
+ # from langchain.chains.conversation.memory import ConversationBufferWindowMemory
34
+ # from langchain.agents import Tool, initialize_agent
35
+ # from huggingface_hub import login
36
+
37
+ # # Check if the token is already set in the environment variables
38
+ # hf_token = os.getenv("HF_TOKEN")
39
+
40
+ # if hf_token is None:
41
+ # # If the token is not set, prompt for it (this should be done securely)
42
+ # print("Please set your Hugging Face token in the environment variables.")
43
+ # else:
44
+ # # Login using the token
45
+ # login(token=hf_token)
46
+
47
+ # # Your application logic goes here
48
+ # print("Logged in successfully to Hugging Face Hub!")
49
+
50
+ # # Set up logging
51
+ # logging.basicConfig(level=logging.DEBUG)
52
+
53
+ # # Initialize OpenAI embeddings
54
+ # embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
55
+
56
+ # # Initialize Pinecone
57
+ # from pinecone import Pinecone
58
+ # pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
59
+
60
+ # index_name = "birmingham-dataset"
61
+ # vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
62
+ # retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
63
+
64
+ # # Initialize ChatOpenAI model
65
+ # chat_model = ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'],
66
+ # temperature=0, model='gpt-4o')
67
+
68
+ # conversational_memory = ConversationBufferWindowMemory(
69
+ # memory_key='chat_history',
70
+ # k=10,
71
+ # return_messages=True
72
+ # )
73
+
74
+ # def get_current_time_and_date():
75
+ # now = datetime.now()
76
+ # return now.strftime("%Y-%m-%d %H:%M:%S")
77
+
78
+ # # Example usage
79
+ # current_time_and_date = get_current_time_and_date()
80
+
81
+ # def fetch_local_events():
82
+ # api_key = os.environ['SERP_API']
83
+ # url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
84
+
85
+ # response = requests.get(url)
86
+ # if response.status_code == 200:
87
+ # events_results = response.json().get("events_results", [])
88
+ # events_html = """
89
+ # <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Local Events</h2>
90
+ # <style>
91
+ # .event-item {
92
+ # font-family: 'Verdana', sans-serif;
93
+ # color: #333;
94
+ # margin-bottom: 15px;
95
+ # padding: 10px;
96
+ # font-weight: bold;
97
+ # }
98
+ # .event-item a {
99
+ # color: #1E90FF;
100
+ # text-decoration: none;
101
+ # }
102
+ # .event-item a:hover {
103
+ # text-decoration: underline;
104
+ # }
105
+ # </style>
106
+ # """
107
+ # for index, event in enumerate(events_results):
108
+ # title = event.get("title", "No title")
109
+ # date = event.get("date", "No date")
110
+ # location = event.get("address", "No location")
111
+ # link = event.get("link", "#")
112
+ # events_html += f"""
113
+ # <div class="event-item">
114
+ # <a href='{link}' target='_blank'>{index + 1}. {title}</a>
115
+ # <p>Date: {date}<br>Location: {location}</p>
116
+ # </div>
117
+ # """
118
+ # return events_html
119
+ # else:
120
+ # return "<p>Failed to fetch local events</p>"
121
+
122
+ # def fetch_local_weather():
123
+ # try:
124
+ # api_key = os.environ['WEATHER_API']
125
+ # url = f'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/birmingham?unitGroup=metric&include=events%2Calerts%2Chours%2Cdays%2Ccurrent&key={api_key}'
126
+ # response = requests.get(url)
127
+ # response.raise_for_status()
128
+ # jsonData = response.json()
129
+
130
+ # current_conditions = jsonData.get("currentConditions", {})
131
+ # temp_celsius = current_conditions.get("temp", "N/A")
132
+
133
+ # if temp_celsius != "N/A":
134
+ # temp_fahrenheit = int((temp_celsius * 9/5) + 32)
135
+ # else:
136
+ # temp_fahrenheit = "N/A"
137
+
138
+ # condition = current_conditions.get("conditions", "N/A")
139
+ # humidity = current_conditions.get("humidity", "N/A")
140
+
141
+ # weather_html = f"""
142
+ # <div class="weather-theme">
143
+ # <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Local Weather</h2>
144
+ # <div class="weather-content">
145
+ # <div class="weather-icon">
146
+ # <img src="https://www.weatherbit.io/static/img/icons/{get_weather_icon(condition)}.png" alt="{condition}" style="width: 100px; height: 100px;">
147
+ # </div>
148
+ # <div class="weather-details">
149
+ # <p style="font-family: 'Verdana', sans-serif; color: #333; font-size: 1.2em;">Temperature: {temp_fahrenheit}°F</p>
150
+ # <p style="font-family: 'Verdana', sans-serif; color: #333; font-size: 1.2em;">Condition: {condition}</p>
151
+ # <p style="font-family: 'Verdana', sans-serif; color: #333; font-size: 1.2em;">Humidity: {humidity}%</p>
152
+ # </div>
153
+ # </div>
154
+ # </div>
155
+ # <style>
156
+ # .weather-theme {{
157
+ # animation: backgroundAnimation 10s infinite alternate;
158
+ # border-radius: 10px;
159
+ # padding: 10px;
160
+ # margin-bottom: 15px;
161
+ # background: linear-gradient(45deg, #ffcc33, #ff6666, #ffcc33, #ff6666);
162
+ # background-size: 400% 400%;
163
+ # box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
164
+ # transition: box-shadow 0.3s ease, background-color 0.3s ease;
165
+ # }}
166
+ # .weather-theme:hover {{
167
+ # box-shadow: 0 8px 16px rgba(0, 0, 0, 0.2);
168
+ # background-position: 100% 100%;
169
+ # }}
170
+ # @keyframes backgroundAnimation {{
171
+ # 0% {{ background-position: 0% 50%; }}
172
+ # 100% {{ background-position: 100% 50%; }}
173
+ # }}
174
+ # .weather-content {{
175
+ # display: flex;
176
+ # align-items: center;
177
+ # }}
178
+ # .weather-icon {{
179
+ # flex: 1;
180
+ # }}
181
+ # .weather-details {{
182
+ # flex: 3;
183
+ # }}
184
+ # </style>
185
+ # """
186
+ # return weather_html
187
+ # except requests.exceptions.RequestException as e:
188
+ # return f"<p>Failed to fetch local weather: {e}</p>"
189
+
190
+ # def get_weather_icon(condition):
191
+ # condition_map = {
192
+ # "Clear": "c01d",
193
+ # "Partly Cloudy": "c02d",
194
+ # "Cloudy": "c03d",
195
+ # "Overcast": "c04d",
196
+ # "Mist": "a01d",
197
+ # "Patchy rain possible": "r01d",
198
+ # "Light rain": "r02d",
199
+ # "Moderate rain": "r03d",
200
+ # "Heavy rain": "r04d",
201
+ # "Snow": "s01d",
202
+ # "Thunderstorm": "t01d",
203
+ # "Fog": "a05d",
204
+ # }
205
+ # return condition_map.get(condition, "c04d")
206
+
207
+ # # Update prompt templates to include fetched details
208
+
209
+ # current_time_and_date = get_current_time_and_date()
210
+
211
+ # # Define prompt templates
212
+ # template1 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on weather being a sunny bright day and the today's date is 1st july 2024, use the following pieces of context,
213
+ # memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
214
+ # Use fifteen sentences maximum. Keep the answer as detailed as possible. Always include the address, time, date, and
215
+ # event type and description. Always say "It was my pleasure!" at the end of the answer.
216
+ # {context}
217
+ # Question: {question}
218
+ # Helpful Answer:"""
219
+
220
+ # template2 = """You are an expert concierge who is helpful and a renowned guide for Birmingham,Alabama. Based on today's weather being a sunny bright day and today's date is 1st july 2024, take the location or address but don't show the location or address on the output prompts. Use the following pieces of context,
221
+ # memory, and message history, along with your knowledge of perennial events in Birmingham,Alabama, to answer the question at the end. If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
222
+ # Keep the answer short and sweet and crisp. Always say "It was my pleasure!" at the end of the answer.
223
+ # {context}
224
+ # Question: {question}
225
+ # Helpful Answer:"""
226
+
227
+ # QA_CHAIN_PROMPT_1 = PromptTemplate(input_variables=["context", "question"], template=template1)
228
+ # QA_CHAIN_PROMPT_2 = PromptTemplate(input_variables=["context", "question"], template=template2)
229
+
230
+ # # Define the retrieval QA chain
231
+ # def build_qa_chain(prompt_template):
232
+ # qa_chain = RetrievalQA.from_chain_type(
233
+ # llm=chat_model,
234
+ # chain_type="stuff",
235
+ # retriever=retriever,
236
+ # chain_type_kwargs={"prompt": prompt_template}
237
+ # )
238
+ # tools = [
239
+ # Tool(
240
+ # name='Knowledge Base',
241
+ # func=qa_chain,
242
+ # description='Use this tool when answering general knowledge queries to get more information about the topic'
243
+ # )
244
+ # ]
245
+ # return qa_chain, tools
246
+
247
+ # # Define the agent initializer
248
+ # def initialize_agent_with_prompt(prompt_template):
249
+ # qa_chain, tools = build_qa_chain(prompt_template)
250
+ # agent = initialize_agent(
251
+ # agent='chat-conversational-react-description',
252
+ # tools=tools,
253
+ # llm=chat_model,
254
+ # verbose=False,
255
+ # max_iteration=5,
256
+ # early_stopping_method='generate',
257
+ # memory=conversational_memory
258
+ # )
259
+ # return agent
260
+
261
+ # # Define the function to generate answers
262
+ # def generate_answer(message, choice):
263
+ # logging.debug(f"generate_answer called with prompt_choice: {choice}")
264
+
265
+ # if choice == "Details":
266
+ # agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_1)
267
+ # elif choice == "Conversational":
268
+ # agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_2)
269
+ # else:
270
+ # logging.error(f"Invalid prompt_choice: {choice}. Defaulting to 'Conversational'")
271
+ # agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_2)
272
+ # response = agent(message)
273
+
274
+ # # Extract addresses for mapping regardless of the choice
275
+ # addresses = extract_addresses(response['output'])
276
+ # return response['output'], addresses
277
+
278
+ # def bot(history, choice, tts_model):
279
+ # if not history:
280
+ # return history
281
+ # response, addresses = generate_answer(history[-1][0], choice)
282
+ # history[-1][1] = ""
283
+
284
+ # # Generate audio for the entire response in a separate thread
285
+ # with concurrent.futures.ThreadPoolExecutor() as executor:
286
+ # if tts_model == "ElevenLabs":
287
+ # audio_future = executor.submit(generate_audio_elevenlabs, response)
288
+ # else:
289
+ # audio_future = executor.submit(generate_audio_parler_tts, response)
290
+
291
+ # for character in response:
292
+ # history[-1][1] += character
293
+ # time.sleep(0.05) # Adjust the speed of text appearance
294
+ # yield history, None
295
+
296
+ # audio_path = audio_future.result()
297
+ # yield history, audio_path
298
+
299
+
300
+ # def add_message(history, message):
301
+ # history.append((message, None))
302
+ # return history, gr.Textbox(value="", interactive=True, placeholder="Enter message or upload file...", show_label=False)
303
+
304
+ # def print_like_dislike(x: gr.LikeData):
305
+ # print(x.index, x.value, x.liked)
306
+
307
+ # def extract_addresses(response):
308
+ # if not isinstance(response, str):
309
+ # response = str(response)
310
+ # address_patterns = [
311
+ # r'([A-Z].*,\sBirmingham,\sAL\s\d{5})',
312
+ # r'(\d{4}\s.*,\sBirmingham,\sAL\s\d{5})',
313
+ # r'([A-Z].*,\sAL\s\d{5})',
314
+ # r'([A-Z].*,.*\sSt,\sBirmingham,\sAL\s\d{5})',
315
+ # r'([A-Z].*,.*\sStreets,\sBirmingham,\sAL\s\d{5})',
316
+ # r'(\d{2}.*\sStreets)',
317
+ # r'([A-Z].*\s\d{2},\sBirmingham,\sAL\s\d{5})'
318
+ # r'([a-zA-Z]\s Birmingham)'
319
+ # ]
320
+ # addresses = []
321
+ # for pattern in address_patterns:
322
+ # addresses.extend(re.findall(pattern, response))
323
+ # return addresses
324
+
325
+ # all_addresses = []
326
+
327
+ # def generate_map(location_names):
328
+ # global all_addresses
329
+ # all_addresses.extend(location_names)
330
+
331
+ # api_key = os.environ['GOOGLEMAPS_API_KEY']
332
+ # gmaps = GoogleMapsClient(key=api_key)
333
+
334
+ # m = folium.Map(location=[33.5175,-86.809444], zoom_start=16)
335
+
336
+ # for location_name in all_addresses:
337
+ # geocode_result = gmaps.geocode(location_name)
338
+ # if geocode_result:
339
+ # location = geocode_result[0]['geometry']['location']
340
+ # folium.Marker(
341
+ # [location['lat'], location['lng']],
342
+ # tooltip=f"{geocode_result[0]['formatted_address']}"
343
+ # ).add_to(m)
344
+
345
+ # map_html = m._repr_html_()
346
+ # return map_html
347
+
348
+ # def fetch_local_news():
349
+ # api_key = os.environ['SERP_API']
350
+ # url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
351
+ # response = requests.get(url)
352
+ # if response.status_code == 200:
353
+ # results = response.json().get("news_results", [])
354
+ # news_html = """
355
+ # <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
356
+ # <style>
357
+ # .news-item {
358
+ # font-family: 'Verdana', sans-serif;
359
+ # color: #333;
360
+ # background-color: #f0f8ff;
361
+ # margin-bottom: 15px;
362
+ # padding: 10px;
363
+ # border-radius: 5px;
364
+ # transition: box-shadow 0.3s ease, background-color 0.3s ease;
365
+ # font-weight: bold;
366
+ # }
367
+ # .news-item:hover {
368
+ # box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
369
+ # background-color: #e6f7ff;
370
+ # }
371
+ # .news-item a {
372
+ # color: #1E90FF;
373
+ # text-decoration: none;
374
+ # font-weight: bold;
375
+ # }
376
+ # .news-item a:hover {
377
+ # text-decoration: underline;
378
+ # }
379
+ # .news-preview {
380
+ # position: absolute;
381
+ # display: none;
382
+ # border: 1px solid #ccc;
383
+ # border-radius: 5px;
384
+ # box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
385
+ # background-color: white;
386
+ # z-index: 1000;
387
+ # max-width: 300px;
388
+ # padding: 10px;
389
+ # font-family: 'Verdana', sans-serif;
390
+ # color: #333;
391
+ # }
392
+ # </style>
393
+ # <script>
394
+ # function showPreview(event, previewContent) {
395
+ # var previewBox = document.getElementById('news-preview');
396
+ # previewBox.innerHTML = previewContent;
397
+ # previewBox.style.left = event.pageX + 'px';
398
+ # previewBox.style.top = event.pageY + 'px';
399
+ # previewBox.style.display = 'block';
400
+ # }
401
+ # function hidePreview() {
402
+ # var previewBox = document.getElementById('news-preview');
403
+ # previewBox.style.display = 'none';
404
+ # }
405
+ # </script>
406
+ # <div id="news-preview" class="news-preview"></div>
407
+ # """
408
+ # for index, result in enumerate(results[:7]):
409
+ # title = result.get("title", "No title")
410
+ # link = result.get("link", "#")
411
+ # snippet = result.get("snippet", "")
412
+ # news_html += f"""
413
+ # <div class="news-item" onmouseover="showPreview(event, '{snippet}')" onmouseout="hidePreview()">
414
+ # <a href='{link}' target='_blank'>{index + 1}. {title}</a>
415
+ # <p>{snippet}</p>
416
+ # </div>
417
+ # """
418
+ # return news_html
419
+ # else:
420
+ # return "<p>Failed to fetch local news</p>"
421
+
422
+ # # Voice Control
423
+ # import numpy as np
424
+ # import torch
425
+ # from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
426
+ # from parler_tts import ParlerTTSForConditionalGeneration
427
+ # from transformers import AutoTokenizer
428
+
429
+ # model_id = 'openai/whisper-large-v3'
430
+ # device = "cuda:0" if torch.cuda.is_available() else "cpu"
431
+ # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
432
+ # model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype,
433
+ # #low_cpu_mem_usage=True,
434
+ # use_safetensors=True).to(device)
435
+ # processor = AutoProcessor.from_pretrained(model_id)
436
+
437
+ # # Optimized ASR pipeline
438
+ # pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=True)
439
+
440
+ # base_audio_drive = "/data/audio"
441
+
442
+ # import numpy as np
443
+
444
+ # def transcribe_function(stream, new_chunk):
445
+ # try:
446
+ # sr, y = new_chunk[0], new_chunk[1]
447
+ # except TypeError:
448
+ # print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
449
+ # return stream, "", None
450
+
451
+ # y = y.astype(np.float32) / np.max(np.abs(y))
452
+
453
+ # if stream is not None:
454
+ # stream = np.concatenate([stream, y])
455
+ # else:
456
+ # stream = y
457
+
458
+ # result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
459
+
460
+ # full_text = result.get("text", "")
461
+
462
+ # return stream, full_text, result
463
+
464
+ # def update_map_with_response(history):
465
+ # if not history:
466
+ # return ""
467
+ # response = history[-1][1]
468
+ # addresses = extract_addresses(response)
469
+ # return generate_map(addresses)
470
+
471
+ # def clear_textbox():
472
+ # return ""
473
+
474
+ # def show_map_if_details(history,choice):
475
+ # if choice in ["Details", "Conversational"]:
476
+ # return gr.update(visible=True), update_map_with_response(history)
477
+ # else:
478
+ # return gr.update(visible=False), ""
479
+
480
+ # def generate_audio_elevenlabs(text):
481
+ # XI_API_KEY = os.environ['ELEVENLABS_API']
482
+ # VOICE_ID = 'd9MIrwLnvDeH7aZb61E9' # Replace with your voice ID
483
+ # tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
484
+ # headers = {
485
+ # "Accept": "application/json",
486
+ # "xi-api-key": XI_API_KEY
487
+ # }
488
+ # data = {
489
+ # "text": str(text),
490
+ # "model_id": "eleven_multilingual_v2",
491
+ # "voice_settings": {
492
+ # "stability": 1.0,
493
+ # "similarity_boost": 0.0,
494
+ # "style": 0.60, # Adjust style for more romantic tone
495
+ # "use_speaker_boost": False
496
+ # }
497
+ # }
498
+ # response = requests.post(tts_url, headers=headers, json=data, stream=True)
499
+ # if response.ok:
500
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
501
+ # for chunk in response.iter_content(chunk_size=1024):
502
+ # f.write(chunk)
503
+ # temp_audio_path = f.name
504
+ # logging.debug(f"Audio saved to {temp_audio_path}")
505
+ # return temp_audio_path
506
+ # else:
507
+ # logging.error(f"Error generating audio: {response.text}")
508
+ # return None
509
+
510
+ # def generate_audio_parler_tts(text):
511
+ # model_id = 'parler-tts/parler_tts_mini_v0.1'
512
+ # device = "cuda:0" if torch.cuda.is_available() else "cpu"
513
+ # try:
514
+ # model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
515
+ # except torch.cuda.OutOfMemoryError:
516
+ # print("CUDA out of memory. Switching to CPU.")
517
+ # device = "cpu"
518
+ # model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
519
+ # tokenizer = AutoTokenizer.from_pretrained(model_id)
520
+
521
+ # description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
522
+
523
+ # input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
524
+ # prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
525
+
526
+ # generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
527
+ # audio_arr = generation.cpu().numpy().squeeze()
528
+
529
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
530
+ # sf.write(f.name, audio_arr, model.config.sampling_rate)
531
+ # temp_audio_path = f.name
532
+
533
+ # logging.debug(f"Audio saved to {temp_audio_path}")
534
+ # return temp_audio_path
535
+
536
+ # # Stable Diffusion setup
537
+ # pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
538
+ # pipe = pipe.to("cuda")
539
+
540
+ # def generate_image(prompt):
541
+ # image = pipe(
542
+ # prompt,
543
+ # negative_prompt="",
544
+ # num_inference_steps=28,
545
+ # guidance_scale=3.0,
546
+ # ).images[0]
547
+ # return image
548
+
549
+ # # Hardcoded prompt for image generation
550
+ # hardcoded_prompt_1="Give a high quality photograph of a great looking red 2026 Bentley coupe against a skyline setting in th night, michael mann style in omaha enticing the consumer to buy this product"
551
+ # hardcoded_prompt_2="A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
552
+ # hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
553
+
554
+ # def update_images():
555
+ # image_1 = generate_image(hardcoded_prompt_1)
556
+ # image_2 = generate_image(hardcoded_prompt_2)
557
+ # image_3 = generate_image(hardcoded_prompt_3)
558
+ # return image_1, image_2, image_3
559
+
560
+ # with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
561
+
562
+ # with gr.Row():
563
+ # with gr.Column():
564
+ # state = gr.State()
565
+
566
+ # chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
567
+ # choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
568
+ # tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
569
+
570
+ # gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
571
+ # chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
572
+ # chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
573
+ # bot_msg = chat_msg.then(bot, [chatbot, choice, tts_choice], [chatbot, gr.Audio(interactive=False, autoplay=True)])
574
+ # bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Ask Radar!!!...", show_label=False), None, [chat_input])
575
+ # chatbot.like(print_like_dislike, None, None)
576
+ # clear_button = gr.Button("Clear")
577
+ # clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
578
+
579
+
580
+ # audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
581
+ # audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
582
 
583
+ # # gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
584
+ # # location_output = gr.HTML()
585
+ # # bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
586
+
587
+ # # with gr.Column():
588
+ # # weather_output = gr.HTML(value=fetch_local_weather())
589
+ # # news_output = gr.HTML(value=fetch_local_news())
590
+ # # news_output = gr.HTML(value=fetch_local_events())
591
+
592
+ # with gr.Column():
593
+
594
+ # image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
595
+ # image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
596
+ # image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
597
 
 
 
598
 
599
+ # refresh_button = gr.Button("Refresh Images")
600
+ # refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
601
+
602
+ # demo.queue()
603
+ # demo.launch(share=True)
604
+
605
+ #### Modified -1 ####
606
 
607
+ import subprocess
608
+ import sys
609
  import gradio as gr
610
  import requests
611
  import os
 
623
  from gtts import gTTS
624
  from diffusers import StableDiffusion3Pipeline
625
  import soundfile as sf
 
626
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
627
  from langchain_pinecone import PineconeVectorStore
628
  from langchain.prompts import PromptTemplate
 
631
  from langchain.agents import Tool, initialize_agent
632
  from huggingface_hub import login
633
 
634
+ def install_parler_tts():
635
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/parler-tts.git"])
636
+
637
+ # Call the function to install parler-tts
638
+ install_parler_tts()
639
+
640
  # Check if the token is already set in the environment variables
641
  hf_token = os.getenv("HF_TOKEN")
642
 
 
899
  audio_path = audio_future.result()
900
  yield history, audio_path
901
 
 
902
  def add_message(history, message):
903
  history.append((message, None))
904
  return history, gr.Textbox(value="", interactive=True, placeholder="Enter message or upload file...", show_label=False)
 
1125
  input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
1126
  prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
1127
 
1128
+ max_input_length = model.config.n_positions - input_ids.shape[1]
1129
+ segments = [prompt_input_ids[0][i:i+max_input_length] for i in range(0, prompt_input_ids.shape[1], max_input_length)]
1130
+
1131
+ audio_segments = []
1132
+ for segment in segments:
1133
+ segment = segment.unsqueeze(0)
1134
+ generation = model.generate(input_ids=input_ids, prompt_input_ids=segment)
1135
+ audio_arr = generation.cpu().numpy().squeeze()
1136
+ audio_segments.append(audio_arr)
1137
+
1138
+ full_audio = np.concatenate(audio_segments)
1139
 
1140
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
1141
+ sf.write(f.name, full_audio, model.config.sampling_rate)
1142
  temp_audio_path = f.name
1143
 
1144
  logging.debug(f"Audio saved to {temp_audio_path}")