File size: 6,358 Bytes
59d1ac8
 
 
 
 
 
 
 
 
 
8427ed6
59d1ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# app.py

from flask import Flask, render_template, request, jsonify
import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
import re
import os
from dotenv import load_dotenv
load_dotenv()

app = Flask(__name__)

# Load Model and FAISS Index
model = SentenceTransformer('./sentence-transformers_all-MiniLM-L6-v2')
index = faiss.read_index("faiss_index.bin")
groq_api_key = os.getenv('GROQ_API_KEY')
model_name = "llama-3.3-70b-versatile"

llm = ChatGroq(
            temperature=0, 
            groq_api_key=groq_api_key, 
            model_name=model_name
        )

with open("metadata.json") as f:
    metadata = json.load(f)


def categorize_query(query):
    """
    Categorizes user queries into different types (greetings, small talk, unrelated, etc.).
    """
    query = query.lower().strip()

    # Greetings
    greeting_patterns = [
        r"\bhello\b", r"\bhi\b", r"\bhey\b", r"\bhola\b", r"\bgreetings\b",
        r"\bwhat('s| is) up\b", r"\bhowdy\b", r"\bhiya\b", r"\byo\b",
        r"\bgood (morning|afternoon|evening|day|night)\b",
        r"\bhow (are|r) you\b", r"\bhow's it going\b", r"\bhow have you been\b",
        r"\bhope you are (doing )?(well|good|fine)\b", r"\bnice to meet you\b",
        r"\bpleased to meet you\b"
    ]

    # Thank-you messages
    thank_you_patterns = [
        r"\bthank(s| you)\b", r"\bthanks a lot\b", r"\bthanks so much\b",
        r"\bthank you very much\b", r"\bappreciate it\b", r"\bmuch obliged\b",
        r"\bgrateful\b", r"\bcheers\b"
    ]

    # Small talk
    small_talk_patterns = [
        r"\bhow (are|r) you\b", r"\bhow's your day\b", r"\bwhat's up\b",
        r"\bhow's it going\b", r"\bhow have you been\b", r"\bhope you are well\b"
    ]

    # Unrelated topics
    unrelated_patterns = [
        r"\btell me a joke\b", r"\bwho won\b", r"\bwhat is ai\b", r"\bexplain blockchain\b"
    ]


    # Goodbye messages
    goodbye_patterns = [
        r"\bbye\b", r"\bgoodbye\b", r"\bsee you\b", r"\bhave a nice day\b"
    ]

    # Rude or inappropriate messages
    rude_patterns = [
        r"\bstupid\b", r"\bdumb\b", r"\buseless\b", r"\bshut up\b"
    ]

    if any(re.search(pattern, query) for pattern in greeting_patterns):
        return "greeting"
    if any(re.search(pattern, query) for pattern in thank_you_patterns):
        return "thank_you"
    if any(re.search(pattern, query) for pattern in small_talk_patterns):
        return "small_talk"
    if any(re.search(pattern, query) for pattern in unrelated_patterns):
        return "unrelated"
    if any(re.search(pattern, query) for pattern in goodbye_patterns):
        return "goodbye"
    if any(re.search(pattern, query) for pattern in rude_patterns):
        return "rude"

    return "normal"

# Function to Search for Relevant Answers
def search_text(query, top_k=2):
    query_embedding = np.array(model.encode(query, convert_to_numpy=True)).astype("float32").reshape(1, -1)
    distances, indices = index.search(query_embedding, top_k)

    results = []
    for idx in indices[0]:
        if idx >= 0:
            results.append(metadata[idx])
    
    return results

# Serve HTML Page
@app.route("/")
def home():
    return render_template("index.html")

@app.route("/query", methods=["POST"])
def query_pdf():
    query = request.json.get("query")

    query_type = categorize_query(query)

    if query_type == "greeting":
        return jsonify({"text": "Hello! How can I assist you with Exelsys EasyHR?", "images": []})

    if query_type == "thank_you":
        return jsonify({"text": "You're welcome! How can I assist you further?", "images": []})

    if query_type == "small_talk":
        return jsonify({"text": "I'm here to assist with Exelsys EasyHR. How can I help?", "images": []})

    if query_type == "unrelated":
        return jsonify({"text": "I'm here to assist with Exelsys easyHR queries only.", "images": []})

    if query_type == "vague":
        return jsonify({"text": "Could you please provide more details?", "images": []})

    if query_type == "goodbye":
        return jsonify({"text": "You're welcome! Have a great day!", "images": []})

    if query_type == "rude":
        return jsonify({"text": "I'm here to assist you professionally.", "images": []})



    # Search for relevant PDF content using FAISS
    results = search_text(query, top_k=3)

    if not results:
        return jsonify({"text": "No relevant results found in the PDF.", "images": []})

    # Merge multiple text results
    retrieved_text = "\n\n---\n\n".join([res["text"] for res in results])
    print(retrieved_text)
    
    prompt_extract = PromptTemplate.from_template(
        """
        ### YOU ARE AN EXELSYS EASYHR GUIDE ASSISTANT:
        ### INSTRUCTIONS:
        - Your job is to provide step-by-step guidance for the following user query based on the provided context.
        - Base your response **only** on the retrieved context from the PDF.
        - If no relevant information is found, simply respond with: "Not found."
        - If the user greets you (e.g., "Hello", "Hi", "Good morning"), respond politely but keep it brief.
        - If the query is unrelated to Exelsys easyHR, respond with: "I'm here to assist with Exelsys easyHR queries only."
        - Provide clear and concise answers.
        - Provide all the links that inside any topic in <a> tag.
        

        ### USER QUERY:
        {query}

        ### CONTEXT FROM PDF:
        {retrieved_text}

        ### ANSWER:
        """
    )

    # Chain the prompt with ChatGroq
    chain_extract = prompt_extract | llm
    chat_response = chain_extract.invoke({"query": query, "retrieved_text": retrieved_text})

    # Convert response to string
    response_text = str(chat_response.content)

    # Determine if images should be included
    # retrieved_images = []
    # if "Not found." not in response_text and "I'm here to assist" not in response_text:
    #     retrieved_images = [img for res in results if "images" in res for img in res["images"]]

    # Final response JSON
    response = {
        "text": response_text,
        # "images": retrieved_images
    }
    print(response)

    return jsonify(response)

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)