File size: 7,873 Bytes
59d1ac8
 
 
 
 
 
 
 
 
96df80c
8427ed6
59d1ac8
 
 
 
 
 
90bf16b
 
6d3f718
 
 
 
 
59d1ac8
 
6d3f718
 
 
 
 
 
 
 
 
 
 
 
 
 
59d1ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90bf16b
59d1ac8
90bf16b
59d1ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d3f718
 
d5f2ab8
ec4cb84
 
 
 
d5f2ab8
 
59d1ac8
d5f2ab8
b131ac9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59d1ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e3efb6
59d1ac8
6d3f718
59d1ac8
 
 
90bf16b
d5f2ab8
59d1ac8
90bf16b
59d1ac8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225

from flask import Flask, render_template, request, jsonify
import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
import re
from langchain_openai import AzureChatOpenAI
import os
from dotenv import load_dotenv
load_dotenv()

app = Flask(__name__)

# Load Model and FAISS Index


AzureOpenAiEndpoint= os.getenv("AZURE_OPENAI_ENDPOINT")
AzureOpenAiKey= os.getenv("AZURE_OPENAI_KEY")
OpenaiApiVersion= os.getenv("OPENAI_API_VERSION")
DeploymentName= os.getenv("DEPLOYMENT_NAME")

model = SentenceTransformer('./sentence-transformers_all-MiniLM-L6-v2')
index = faiss.read_index("faiss_index.bin")
# groq_api_key = os.getenv('GROQ_API_KEY')
# model_name = "llama-3.3-70b-versatile"

llm = AzureChatOpenAI(
    openai_api_version= OpenaiApiVersion,  # Change based on your API version
    deployment_name = DeploymentName,  # The model deployment name in Azure
    openai_api_key= AzureOpenAiKey,  # Use the environment variable
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],  # Use the environment variable
)
# llm = ChatGroq(
#             temperature=0, 
#             groq_api_key=groq_api_key, 
#             model_name=model_name
#         )

with open("metadata.json") as f:
    metadata = json.load(f)


def categorize_query(query):
    """
    Categorizes user queries into different types (greetings, small talk, unrelated, etc.).
    """
    query = query.lower().strip()

    # Greetings
    greeting_patterns = [
        r"\bhello\b", r"\bhi\b", r"\bhey\b", r"\bhola\b", r"\bgreetings\b",
        r"\bwhat('s| is) up\b", r"\bhowdy\b", r"\bhiya\b", r"\byo\b",
        r"\bgood (morning|afternoon|evening|day|night)\b",
        r"\bhow (are|r) you\b", r"\bhow's it going\b", r"\bhow have you been\b",
        r"\bhope you are (doing )?(well|good|fine)\b", r"\bnice to meet you\b",
        r"\bpleased to meet you\b"
    ]

    # Thank-you messages
    thank_you_patterns = [
        r"\bthank(s| you)\b", r"\bthanks a lot\b", r"\bthanks so much\b",
        r"\bthank you very much\b", r"\bappreciate it\b", r"\bmuch obliged\b",
        r"\bgrateful\b", r"\bcheers\b"
    ]

    # Small talk
    small_talk_patterns = [
        r"\bhow (are|r) you\b", r"\bhow's your day\b", r"\bwhat's up\b",
        r"\bhow's it going\b", r"\bhow have you been\b", r"\bhope you are well\b"
    ]

    # Unrelated topics
    unrelated_patterns = [
        r"\btell me a joke\b", r"\bwho won\b", r"\bwhat is ai\b", r"\bexplain blockchain\b"
    ]


    # Goodbye messages
    goodbye_patterns = [
        r"\bbye\b", r"\bgoodbye\b", r"\bsee you\b", r"\bhave a nice day\b"
    ]

    # Rude or inappropriate messages
    rude_patterns = [
        r"\bstupid\b", r"\bdumb\b", r"\buseless\b", r"\bshut up\b"
    ]

    if any(re.search(pattern, query) for pattern in greeting_patterns):
        return "greeting"
    if any(re.search(pattern, query) for pattern in thank_you_patterns):
        return "thank_you"
    if any(re.search(pattern, query) for pattern in small_talk_patterns):
        return "small_talk"
    if any(re.search(pattern, query) for pattern in unrelated_patterns):
        return "unrelated"
    if any(re.search(pattern, query) for pattern in goodbye_patterns):
        return "goodbye"
    if any(re.search(pattern, query) for pattern in rude_patterns):
        return "rude"

    return "normal"

# Function to Search for Relevant Answers
def search_text(query, top_k=2):
    query_embedding = np.array(model.encode(query, convert_to_numpy=True)).astype("float32").reshape(1, -1)
    distances, indices = index.search(query_embedding, top_k)

    results = []
    for idx in indices[0]:
        if idx >= 0:
            results.append(metadata[idx])
    
    return results

# Serve HTML Page
@app.route("/")
def home():
    return render_template("index.html")

import re
@app.route("/query", methods=["POST"])

def query_pdf():
    query = request.json.get("query")

    query_type = categorize_query(query)

    if query_type == "greeting":
        return jsonify({"text": "Hello! How can I assist you with Exelsys EasyHR?", "images": []})

    if query_type == "thank_you":
        return jsonify({"text": "You're welcome! How can I assist you further?", "images": []})

    if query_type == "small_talk":
        return jsonify({"text": "I'm here to assist with Exelsys EasyHR. How can I help?", "images": []})

    if query_type == "unrelated":
        return jsonify({"text": "I'm here to assist with Exelsys easyHR queries only.", "images": []})

    if query_type == "vague":
        return jsonify({"text": "Could you please provide more details?", "images": []})

    if query_type == "goodbye":
        return jsonify({"text": "You're welcome! Have a great day!", "images": []})

    if query_type == "rude":
        return jsonify({"text": "I'm here to assist you professionally.", "images": []})

    # Search for relevant PDF content using FAISS
    results = search_text(query, top_k=3)

    if not results:
        return jsonify({"text": "No relevant results found in the PDF.", "images": []})

    # Merge multiple text results
    retrieved_text = "\n\n---\n\n".join([res["text"] for res in results])
    print(retrieved_text)
    
    prompt_extract = PromptTemplate.from_template(
        """
        ### YOU ARE AN EXELSYS EASYHR GUIDE ASSISTANT:
        ### INSTRUCTIONS:
        ### YOU ARE AN EXELSYS EASYHR GUIDE ASSISTANT:
        ### INSTRUCTIONS:
        - Your job is to provide step-by-step guidance for the following user query.
        - Check user query and provide relevant information from the Exelsys easyHR PDF.
        - Only provide image path if they are relevant to or you response and user query.
        - Only format the text response in html tags in steps form and not image paths. Don't provide headings in big tags. 
        - Do not add questions to response.
        - Base your response **only** on the retrieved context from the PDF.
        - If no relevant information is found, simply respond with: "Not found."
        - If the user greets you (e.g., "Hello", "Hi", "Good morning"), respond politely but keep it brief.
        - If the query is unrelated to Exelsys easyHR, respond with: "I'm here to assist with Exelsys easyHR queries only." 
        
        Each step may have an associated image that should be placed **immediately after** the step description.

        ### **Formatting Rules:**
        1. Each step should be numbered (`1., 2., 3., etc.`).
        2. If a step has an associated image, include the Markdown image syntax **right after** the step description.
        3. Do NOT list all images at the bottom; place them **inline** with their respective steps.
        4. If no image is available for a step, simply move to the next step.

        ### Example Format:
        1. Step 1 Text (description of the step).
        <img src"static/output_images/step1.png" class="inline-image" onclick="expandImage(this)">
        
        2. Step 2 Text (description of the step).
        
        3. Step 3 Text (description of the step).
        <img src"static/output_images/step2.png" class="inline-image" onclick="expandImage(this)"> 
        
        ### USER QUERY:
        {query}

        ### CONTEXT FROM PDF:
        {retrieved_text}

        ### ANSWER:
        """
    )

    # Chain the prompt with ChatGroq
    chain_extract = prompt_extract | llm
    chat_response = chain_extract.invoke({"query": query, "retrieved_text": retrieved_text})

    # Convert response to string
    response_text = str(chat_response.content)

    # Final response JSON
    response = {
        "text": response_text  # Store multiple images in a list
    }
    print(response)

    return jsonify(response)



if __name__ == "__main__":
    app.run(debug=True)