File size: 12,850 Bytes
9965e97 2ea6305 9965e97 2ea6305 9965e97 2ea6305 9965e97 2ea6305 9965e97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 |
import logging
# Set up logging
from langchain_openai import OpenAIEmbeddings
import os
import re
import folium
import gradio as gr
import time
import requests
from googlemaps import Client as GoogleMapsClient
from gtts import gTTS
import tempfile
import string
embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
index_name = "omaha-details"
from langchain_pinecone import PineconeVectorStore
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.agents import Tool, initialize_agent
# Build prompt
template1 = """You are an expert concierge who is helpful and a renowned guide for Omaha, Nebraska. Use the following pieces of context,
memory, and message history, along with your knowledge of perennial events in Omaha, Nebraska, to answer the question at the end.
If you don't know the answer, just say "Homie, I need to get more data for this," and don't try to make up an answer.
Use fifteen sentences maximum. Keep the answer as detailed as possible. Always include the address, time, date, and
event type and description. Always say "It was my pleasure!" at the end of the answer.
Question: {question}
Helpful Answer:"""
template2 = """You are an expert guide of Omaha, Nebraska's perennial events.
With the context, memory, and message history provided, answer the question in as crisp as possible. Always include the time, date, and
event type and description only apart from that don't give any other details. Always say "It was my pleasure!" at the end of the answer.
If you don't know the answer, simply say, "Homie, I need to get more data for this," without making up an answer.
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT_1 = PromptTemplate(input_variables=["context", "question"], template=template1)
QA_CHAIN_PROMPT_2 = PromptTemplate(input_variables=["context", "question"], template=template2)
chat_model = ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'],
temperature=0, model='gpt-4o')
conversational_memory = ConversationBufferWindowMemory(
# Define the retrieval QA chain
def build_qa_chain(prompt_template):
qa_chain = RetrievalQA.from_chain_type(
chain_type_kwargs={"prompt": prompt_template}
tools = [
name='Knowledge Base',
description='use this tool when answering general knowledge queries to get more information about the topic'
return qa_chain, tools
# Define the agent initializer
def initialize_agent_with_prompt(prompt_template):
qa_chain, tools = build_qa_chain(prompt_template)
agent = initialize_agent(
return agent
# Define the function to generate answers
def generate_answer(message, choice):
logging.debug(f"generate_answer called with prompt_choice: {choice}")
if choice == "Details":
agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_1)
elif choice == "Conversational":
agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_2)
logging.error(f"Invalid prompt_choice: {choice}. Defaulting to 'Details'")
agent = initialize_agent_with_prompt(QA_CHAIN_PROMPT_1)
response = agent(message)
return response['output']
def bot(history, choice):
if not history:
return history
response = generate_answer(history[-1][0], choice)
history[-1][1] = ""
for character in response:
history[-1][1] += character
yield history
def add_message(history, message):
history.append((message, None))
return history, gr.Textbox(value="", interactive=True, placeholder="Enter message or upload file...", show_label=False)
def print_like_dislike(x: gr.LikeData):
print(x.index, x.value, x.liked)
# Function to extract addresses from the chatbot's response
def extract_addresses(response):
address_pattern_1 = r'([A-Z].*,\sOmaha,\sNE\s\d{5})'
address_pattern_2 = r'(\d{4}\s.*,\sOmaha,\sNE\s\d{5})'
address_pattern_3 = r'([A-Z].*,\sNE\s\d{5})'
address_pattern_4 = r'([A-Z].*,.*\sSt,\sOmaha,\sNE\s\d{5})'
address_pattern_5 = r'([A-Z].*,.*\sStreets,\sOmaha,\sNE\s\d{5})'
address_pattern_6 = r'(\d{2}.*\sStreets)'
address_pattern_7 = r'([A-Z].*\s\d{2},\sOmaha,\sNE\s\d{5})'
addresses = re.findall(address_pattern_1, response) + re.findall(address_pattern_2, response) + \
re.findall(address_pattern_3, response) + re.findall(address_pattern_4, response) + \
re.findall(address_pattern_5, response) + re.findall(address_pattern_6, response) + \
re.findall(address_pattern_7, response)
return addresses
# Store all found addresses
all_addresses = []
# Map generation function using Google Maps Geocoding API
def generate_map(location_names):
global all_addresses
api_key = os.environ['GOOGLEMAPS_API_KEY']
gmaps = GoogleMapsClient(key=api_key)
m = folium.Map(location=[41.2565, -95.9345], zoom_start=12)
for location_name in all_addresses:
geocode_result = gmaps.geocode(location_name)
if geocode_result:
location = geocode_result[0]['geometry']['location']
[location['lat'], location['lng']],
map_html = m._repr_html_()
return map_html
# Function to fetch local news
def fetch_local_news():
api_key = os.environ['SERP_API']
url = f' headline&api_key={api_key}'
response = requests.get(url)
if response.status_code == 200:
results = response.json().get("news_results", [])
news_html = "<h2>Omaha Today Headline </h2>"
for index, result in enumerate(results[:10]):
title = result.get("title", "No title")
link = result.get("link", "#")
snippet = result.get("snippet", "")
news_html += f"<p>{index + 1}. <a href='{link}' target='_blank'>{title}</a><br>{snippet}</p>"
return news_html
return "<p>Failed to fetch local news</p>"
# Function to fetch local events
def fetch_local_events():
api_key = os.environ['SERP_API']
url = f'{api_key}'
response = requests.get(url)
if response.status_code == 200:
events_results = response.json().get("events_results", [])
events_text = "<h2>Local Events </h2>"
for index, event in enumerate(events_results):
title = event.get("title", "No title")
date = event.get("date", "No date")
location = event.get("address", "No location")
link = event.get("link", "#")
events_text += f"<p>{index + 1}. {title}<br> Date: {date}<br> Location: {location}<br> <a href='{link}' target='_blank'>Link :</a> <br>"
return events_text
return "Failed to fetch local events"
# Function to fetch local weather
def fetch_local_weather():
api_key = os.environ['WEATHER_API']
url = f'{api_key}'
response = requests.get(url)
jsonData = response.json()
current_conditions = jsonData.get("currentConditions", {})
temp = current_conditions.get("temp", "N/A")
condition = current_conditions.get("conditions", "N/A")
humidity = current_conditions.get("humidity", "N/A")
weather_html = f"<h2>Local Weather</h2>"
weather_html += f"<p>Temperature: {temp}°C</p>"
weather_html += f"<p>Condition: {condition}</p>"
weather_html += f"<p>Humidity: {humidity}%</p>"
return weather_html
except requests.exceptions.RequestException as e:
return f"<p>Failed to fetch local weather: {e}</p>"
# Voice Control
import numpy as np
import torch
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
model_id = 'openai/whisper-large-v3'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype,
processor = AutoProcessor.from_pretrained(model_id)
# Optimized ASR pipeline
pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=True)
base_audio_drive = "/data/audio"
import numpy as np
def transcribe_function(stream, new_chunk):
sr, y = new_chunk[0], new_chunk[1]
except TypeError:
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
return stream, "", None
y = y.astype(np.float32) / np.max(np.abs(y))
if stream is not None:
stream = np.concatenate([stream, y])
stream = y
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
full_text = result.get("text", "")
return stream, full_text, result
# Map Retrieval Function for location finder
def update_map_with_response(history):
if not history:
return ""
response = history[-1][1]
addresses = extract_addresses(response)
return generate_map(addresses)
def clear_textbox():
return ""
# Gradio Blocks interface
with gr.Blocks(theme='rawrsor1/Everforest') as demo:
with gr.Row():
with gr.Column():
chatbot = gr.Chatbot([], elem_id="chatbot", bubble_full_width=False)
with gr.Column():
weather_output = gr.HTML(value=fetch_local_weather())
with gr.Column():
news_output = gr.HTML(value=fetch_local_news())
def setup_ui():
state = gr.State()
with gr.Row():
with gr.Column():
gr.Markdown("Choose the prompt")
choice = gr.Radio(label="Choose a prompt", choices=["Details", "Conversational"], value="Details")
with gr.Column(): # Larger scale for the right column
gr.Markdown("Enter the query / Voice Output")
chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="Transcription")
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
bot_msg = chat_msg.then(bot, [chatbot, choice], chatbot, api_name="bot_response")
bot_msg.then(lambda: gr.Textbox(value="", interactive=True, placeholder="Enter message or upload file...", show_label=False), None, [chat_input]), None, None)
clear_button = gr.Button("Clear"), inputs=None, outputs=chat_input)
with gr.Column(): # Smaller scale for the left column
gr.Markdown("Stream your Voice")
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy'), inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
with gr.Row():
with gr.Column():
gr.Markdown("Locate the Events")
location_output = gr.HTML()
bot_msg.then(update_map_with_response, chatbot, location_output)