File size: 5,044 Bytes
60e4d0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from tavily import TavilyClient
from dotenv import load_dotenv
import datetime

# 🔹 Load environment variables from .env file
load_dotenv()

# 🔹 Retrieve API keys from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

if not OPENAI_API_KEY or not TAVILY_API_KEY:
    raise ValueError("❌ API keys are missing! Please check your .env file.")

# 🔹 Initialize OpenAI and Tavily clients
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)

llm = ChatOpenAI(
    model_name="llama3-8b-8192",
    temperature=0,
    streaming=False,  # Streaming is controlled by Streamlit
    openai_api_key=OPENAI_API_KEY,
    openai_api_base="https://api.groq.com/openai/v1"
)

# 🔎 Web search function using Tavily API
def search_web_with_tavily(query):
    if len(query) < 5:  # Ignore very short queries
        return ""
    
    print(f"🔍 Sending query to Tavily: {query}")
    search_results = tavily_client.search(query=query, max_results=3)

    # Extract and format the retrieved web results
    snippets = [f"{result['title']}: {result['content']}" for result in search_results['results'] if 'content' in result]
    
    print("✅ Web search results retrieved!")
    return "\n".join(snippets) if snippets else ""

# 📝 Prompt function for AI response generation
def prompt_fn(query: str, context: str, web_context: str = "") -> str:
    """

    This is the main prompt template for the AI assistant.

    

    The assistant must:

    - Prioritize university knowledge first.

    - Use web search only if internal knowledge is insufficient.

    - If no relevant information is found, respond with:

      "I’m sorry, but I don’t have information on this topic."

    - Avoid unnecessary introductions, greetings, or explanations.

    """

    # Include web search results only if available
    search_part = f"\nAdditionally, I found the following information from the web:\n{web_context}\n" if web_context else ""

    return f"""

    Below is the available information for answering student inquiries about Vistula University.



    🔹 Follow this order when answering:

    1️⃣ **Use internal university knowledge first.**  

    2️⃣ **If internal data lacks relevant details, use web search results.**  

    3️⃣ **If no useful information is found, respond with: "I’m sorry, but I don’t have information on this topic."**  



    🔹 Important Rules:

    - **Do not start with introductions.** Provide the answer directly.  

    - **If no information is available, do not add lengthy explanations.**  

    - **Never make up or guess information.**  



    🔹 Available Information:

    {context}

    {search_part}



    🔹 Question:

    {query}



    ---

    ❗ **If no relevant information is found, simply say:**

    - "I’m sorry, but I don’t have information on this topic."

    """

# 🔹 Define the AI pipeline (Prompt → LLM → Output Parsing)
prompt_runnable = RunnableLambda(lambda inputs: prompt_fn(inputs["query"], inputs["context"], inputs.get("web_context", "")))
rag_chain = prompt_runnable | llm | StrOutputParser()

# 🔥 Response generation function
def generate_response(retriever, query):
    # Handle short greetings separately
    if len(query.split()) <= 2 or query.lower() in ["hi", "hello", "help", "hey", "merhaba"]:
        return "👋 Hi there! How can I assist you today? Please ask me a specific question about Vistula University."

    # Retrieve relevant documents from the knowledge base
    relevant_docs = retriever.invoke(query)
    context = "\n".join([doc.page_content for doc in relevant_docs])

    # If no useful data is found, return a short response
    if not relevant_docs or len(context.strip()) < 20:
        return "I’m sorry, but I don’t have information on this topic."

    # Generate response using AI
    inputs = {"query": query, "context": context}
    response = rag_chain.invoke(inputs).strip()

    return response if response else "I’m sorry, but I don’t have information on this topic."

# 🔹 Logging function for tracking interactions
def log_interaction(question, answer, source):
    log_folder = "logs"
    os.makedirs(log_folder, exist_ok=True)  # Ensure logs directory exists

    log_file = os.path.join(log_folder, "chat_log.txt")

    with open(log_file, "a", encoding="utf-8") as f:
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")  # Add timestamp
        f.write(f"{timestamp} | Question: {question}\n")  # Log user question
        f.write(f"{timestamp} | Answer: {answer}\n")  # Log AI response
        f.write(f"{timestamp} | Source: {source}\n")  # Indicate data source (VectorStore/Web)
        f.write("-" * 80 + "\n")  # Separator for readability