import os from langchain_openai import ChatOpenAI from langchain_core.runnables import RunnableLambda from langchain_core.output_parsers import StrOutputParser from tavily import TavilyClient from dotenv import load_dotenv import datetime # ๐Ÿ”น Load environment variables from .env file load_dotenv() # ๐Ÿ”น Retrieve API keys from environment variables OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") if not OPENAI_API_KEY or not TAVILY_API_KEY: raise ValueError("โŒ API keys are missing! Please check your .env file.") # ๐Ÿ”น Initialize OpenAI and Tavily clients tavily_client = TavilyClient(api_key=TAVILY_API_KEY) llm = ChatOpenAI( model_name="llama3-8b-8192", temperature=0, streaming=False, # Streaming is controlled by Streamlit openai_api_key=OPENAI_API_KEY, openai_api_base="https://api.groq.com/openai/v1" ) # ๐Ÿ”Ž Web search function using Tavily API def search_web_with_tavily(query): if len(query) < 5: # Ignore very short queries return "" print(f"๐Ÿ” Sending query to Tavily: {query}") search_results = tavily_client.search(query=query, max_results=3) # Extract and format the retrieved web results snippets = [f"{result['title']}: {result['content']}" for result in search_results['results'] if 'content' in result] print("โœ… Web search results retrieved!") return "\n".join(snippets) if snippets else "" # ๐Ÿ“ Prompt function for AI response generation def prompt_fn(query: str, context: str, web_context: str = "") -> str: """ This is the main prompt template for the AI assistant. The assistant must: - Prioritize university knowledge first. - Use web search only if internal knowledge is insufficient. - If no relevant information is found, respond with: "Iโ€™m sorry, but I donโ€™t have information on this topic." - Avoid unnecessary introductions, greetings, or explanations. """ # Include web search results only if available search_part = f"\nAdditionally, I found the following information from the web:\n{web_context}\n" if web_context else "" return f""" Below is the available information for answering student inquiries about Vistula University. ๐Ÿ”น Follow this order when answering: 1๏ธโƒฃ **Use internal university knowledge first.** 2๏ธโƒฃ **If internal data lacks relevant details, use web search results.** 3๏ธโƒฃ **If no useful information is found, respond with: "Iโ€™m sorry, but I donโ€™t have information on this topic."** ๐Ÿ”น Important Rules: - **Do not start with introductions.** Provide the answer directly. - **If no information is available, do not add lengthy explanations.** - **Never make up or guess information.** ๐Ÿ”น Available Information: {context} {search_part} ๐Ÿ”น Question: {query} --- โ— **If no relevant information is found, simply say:** - "Iโ€™m sorry, but I donโ€™t have information on this topic." """ # ๐Ÿ”น Define the AI pipeline (Prompt โ†’ LLM โ†’ Output Parsing) prompt_runnable = RunnableLambda(lambda inputs: prompt_fn(inputs["query"], inputs["context"], inputs.get("web_context", ""))) rag_chain = prompt_runnable | llm | StrOutputParser() # ๐Ÿ”ฅ Response generation function def generate_response(retriever, query): # Handle short greetings separately if len(query.split()) <= 2 or query.lower() in ["hi", "hello", "help", "hey", "merhaba"]: return "๐Ÿ‘‹ Hi there! How can I assist you today? Please ask me a specific question about Vistula University." # Retrieve relevant documents from the knowledge base relevant_docs = retriever.invoke(query) context = "\n".join([doc.page_content for doc in relevant_docs]) # If no useful data is found, return a short response if not relevant_docs or len(context.strip()) < 20: return "Iโ€™m sorry, but I donโ€™t have information on this topic." # Generate response using AI inputs = {"query": query, "context": context} response = rag_chain.invoke(inputs).strip() return response if response else "Iโ€™m sorry, but I donโ€™t have information on this topic." # ๐Ÿ”น Logging function for tracking interactions def log_interaction(question, answer, source): log_folder = "logs" os.makedirs(log_folder, exist_ok=True) # Ensure logs directory exists log_file = os.path.join(log_folder, "chat_log.txt") with open(log_file, "a", encoding="utf-8") as f: timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Add timestamp f.write(f"{timestamp} | Question: {question}\n") # Log user question f.write(f"{timestamp} | Answer: {answer}\n") # Log AI response f.write(f"{timestamp} | Source: {source}\n") # Indicate data source (VectorStore/Web) f.write("-" * 80 + "\n") # Separator for readability