chatbot_v2.1 / rag_chain.py

Upload 8 files

60e4d0e verified 4 months ago

5.04 kB

	import os
	from langchain_openai import ChatOpenAI
	from langchain_core.runnables import RunnableLambda
	from langchain_core.output_parsers import StrOutputParser
	from tavily import TavilyClient
	from dotenv import load_dotenv
	import datetime

	# 🔹 Load environment variables from .env file
	load_dotenv()

	# 🔹 Retrieve API keys from environment variables
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

	if not OPENAI_API_KEY or not TAVILY_API_KEY:
	raise ValueError("❌ API keys are missing! Please check your .env file.")

	# 🔹 Initialize OpenAI and Tavily clients
	tavily_client = TavilyClient(api_key=TAVILY_API_KEY)

	llm = ChatOpenAI(
	model_name="llama3-8b-8192",
	temperature=0,
	streaming=False, # Streaming is controlled by Streamlit
	openai_api_key=OPENAI_API_KEY,
	openai_api_base="https://api.groq.com/openai/v1"
	)

	# 🔎 Web search function using Tavily API
	def search_web_with_tavily(query):
	if len(query) < 5: # Ignore very short queries
	return ""

	print(f"🔍 Sending query to Tavily: {query}")
	search_results = tavily_client.search(query=query, max_results=3)

	# Extract and format the retrieved web results
	snippets = [f"{result['title']}: {result['content']}" for result in search_results['results'] if 'content' in result]

	print("✅ Web search results retrieved!")
	return "\n".join(snippets) if snippets else ""

	# 📝 Prompt function for AI response generation
	def prompt_fn(query: str, context: str, web_context: str = "") -> str:
	"""
	This is the main prompt template for the AI assistant.

	The assistant must:
	- Prioritize university knowledge first.
	- Use web search only if internal knowledge is insufficient.
	- If no relevant information is found, respond with:
	"I’m sorry, but I don’t have information on this topic."
	- Avoid unnecessary introductions, greetings, or explanations.
	"""

	# Include web search results only if available
	search_part = f"\nAdditionally, I found the following information from the web:\n{web_context}\n" if web_context else ""

	return f"""
	Below is the available information for answering student inquiries about Vistula University.

	🔹 Follow this order when answering:
	1️⃣ Use internal university knowledge first.
	2️⃣ If internal data lacks relevant details, use web search results.
	3️⃣ If no useful information is found, respond with: "I’m sorry, but I don’t have information on this topic."

	🔹 Important Rules:
	- Do not start with introductions. Provide the answer directly.
	- If no information is available, do not add lengthy explanations.
	- Never make up or guess information.

	🔹 Available Information:
	{context}
	{search_part}

	🔹 Question:
	{query}

	---
	❗ If no relevant information is found, simply say:
	- "I’m sorry, but I don’t have information on this topic."
	"""

	# 🔹 Define the AI pipeline (Prompt → LLM → Output Parsing)
	prompt_runnable = RunnableLambda(lambda inputs: prompt_fn(inputs["query"], inputs["context"], inputs.get("web_context", "")))
	rag_chain = prompt_runnable \| llm \| StrOutputParser()

	# 🔥 Response generation function
	def generate_response(retriever, query):
	# Handle short greetings separately
	if len(query.split()) <= 2 or query.lower() in ["hi", "hello", "help", "hey", "merhaba"]:
	return "👋 Hi there! How can I assist you today? Please ask me a specific question about Vistula University."

	# Retrieve relevant documents from the knowledge base
	relevant_docs = retriever.invoke(query)
	context = "\n".join([doc.page_content for doc in relevant_docs])

	# If no useful data is found, return a short response
	if not relevant_docs or len(context.strip()) < 20:
	return "I’m sorry, but I don’t have information on this topic."

	# Generate response using AI
	inputs = {"query": query, "context": context}
	response = rag_chain.invoke(inputs).strip()

	return response if response else "I’m sorry, but I don’t have information on this topic."

	# 🔹 Logging function for tracking interactions
	def log_interaction(question, answer, source):
	log_folder = "logs"
	os.makedirs(log_folder, exist_ok=True) # Ensure logs directory exists

	log_file = os.path.join(log_folder, "chat_log.txt")

	with open(log_file, "a", encoding="utf-8") as f:
	timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Add timestamp
	f.write(f"{timestamp} \| Question: {question}\n") # Log user question
	f.write(f"{timestamp} \| Answer: {answer}\n") # Log AI response
	f.write(f"{timestamp} \| Source: {source}\n") # Indicate data source (VectorStore/Web)
	f.write("-" * 80 + "\n") # Separator for readability