File size: 7,873 Bytes
59d1ac8 96df80c 8427ed6 59d1ac8 90bf16b 6d3f718 59d1ac8 6d3f718 59d1ac8 90bf16b 59d1ac8 90bf16b 59d1ac8 6d3f718 d5f2ab8 ec4cb84 d5f2ab8 59d1ac8 d5f2ab8 b131ac9 59d1ac8 3e3efb6 59d1ac8 6d3f718 59d1ac8 90bf16b d5f2ab8 59d1ac8 90bf16b 59d1ac8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
from flask import Flask, render_template, request, jsonify
import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
import re
from langchain_openai import AzureChatOpenAI
import os
from dotenv import load_dotenv
load_dotenv()
app = Flask(__name__)
# Load Model and FAISS Index
AzureOpenAiEndpoint= os.getenv("AZURE_OPENAI_ENDPOINT")
AzureOpenAiKey= os.getenv("AZURE_OPENAI_KEY")
OpenaiApiVersion= os.getenv("OPENAI_API_VERSION")
DeploymentName= os.getenv("DEPLOYMENT_NAME")
model = SentenceTransformer('./sentence-transformers_all-MiniLM-L6-v2')
index = faiss.read_index("faiss_index.bin")
# groq_api_key = os.getenv('GROQ_API_KEY')
# model_name = "llama-3.3-70b-versatile"
llm = AzureChatOpenAI(
openai_api_version= OpenaiApiVersion, # Change based on your API version
deployment_name = DeploymentName, # The model deployment name in Azure
openai_api_key= AzureOpenAiKey, # Use the environment variable
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], # Use the environment variable
)
# llm = ChatGroq(
# temperature=0,
# groq_api_key=groq_api_key,
# model_name=model_name
# )
with open("metadata.json") as f:
metadata = json.load(f)
def categorize_query(query):
"""
Categorizes user queries into different types (greetings, small talk, unrelated, etc.).
"""
query = query.lower().strip()
# Greetings
greeting_patterns = [
r"\bhello\b", r"\bhi\b", r"\bhey\b", r"\bhola\b", r"\bgreetings\b",
r"\bwhat('s| is) up\b", r"\bhowdy\b", r"\bhiya\b", r"\byo\b",
r"\bgood (morning|afternoon|evening|day|night)\b",
r"\bhow (are|r) you\b", r"\bhow's it going\b", r"\bhow have you been\b",
r"\bhope you are (doing )?(well|good|fine)\b", r"\bnice to meet you\b",
r"\bpleased to meet you\b"
]
# Thank-you messages
thank_you_patterns = [
r"\bthank(s| you)\b", r"\bthanks a lot\b", r"\bthanks so much\b",
r"\bthank you very much\b", r"\bappreciate it\b", r"\bmuch obliged\b",
r"\bgrateful\b", r"\bcheers\b"
]
# Small talk
small_talk_patterns = [
r"\bhow (are|r) you\b", r"\bhow's your day\b", r"\bwhat's up\b",
r"\bhow's it going\b", r"\bhow have you been\b", r"\bhope you are well\b"
]
# Unrelated topics
unrelated_patterns = [
r"\btell me a joke\b", r"\bwho won\b", r"\bwhat is ai\b", r"\bexplain blockchain\b"
]
# Goodbye messages
goodbye_patterns = [
r"\bbye\b", r"\bgoodbye\b", r"\bsee you\b", r"\bhave a nice day\b"
]
# Rude or inappropriate messages
rude_patterns = [
r"\bstupid\b", r"\bdumb\b", r"\buseless\b", r"\bshut up\b"
]
if any(re.search(pattern, query) for pattern in greeting_patterns):
return "greeting"
if any(re.search(pattern, query) for pattern in thank_you_patterns):
return "thank_you"
if any(re.search(pattern, query) for pattern in small_talk_patterns):
return "small_talk"
if any(re.search(pattern, query) for pattern in unrelated_patterns):
return "unrelated"
if any(re.search(pattern, query) for pattern in goodbye_patterns):
return "goodbye"
if any(re.search(pattern, query) for pattern in rude_patterns):
return "rude"
return "normal"
# Function to Search for Relevant Answers
def search_text(query, top_k=2):
query_embedding = np.array(model.encode(query, convert_to_numpy=True)).astype("float32").reshape(1, -1)
distances, indices = index.search(query_embedding, top_k)
results = []
for idx in indices[0]:
if idx >= 0:
results.append(metadata[idx])
return results
# Serve HTML Page
@app.route("/")
def home():
return render_template("index.html")
import re
@app.route("/query", methods=["POST"])
def query_pdf():
query = request.json.get("query")
query_type = categorize_query(query)
if query_type == "greeting":
return jsonify({"text": "Hello! How can I assist you with Exelsys EasyHR?", "images": []})
if query_type == "thank_you":
return jsonify({"text": "You're welcome! How can I assist you further?", "images": []})
if query_type == "small_talk":
return jsonify({"text": "I'm here to assist with Exelsys EasyHR. How can I help?", "images": []})
if query_type == "unrelated":
return jsonify({"text": "I'm here to assist with Exelsys easyHR queries only.", "images": []})
if query_type == "vague":
return jsonify({"text": "Could you please provide more details?", "images": []})
if query_type == "goodbye":
return jsonify({"text": "You're welcome! Have a great day!", "images": []})
if query_type == "rude":
return jsonify({"text": "I'm here to assist you professionally.", "images": []})
# Search for relevant PDF content using FAISS
results = search_text(query, top_k=3)
if not results:
return jsonify({"text": "No relevant results found in the PDF.", "images": []})
# Merge multiple text results
retrieved_text = "\n\n---\n\n".join([res["text"] for res in results])
print(retrieved_text)
prompt_extract = PromptTemplate.from_template(
"""
### YOU ARE AN EXELSYS EASYHR GUIDE ASSISTANT:
### INSTRUCTIONS:
### YOU ARE AN EXELSYS EASYHR GUIDE ASSISTANT:
### INSTRUCTIONS:
- Your job is to provide step-by-step guidance for the following user query.
- Check user query and provide relevant information from the Exelsys easyHR PDF.
- Only provide image path if they are relevant to or you response and user query.
- Only format the text response in html tags in steps form and not image paths. Don't provide headings in big tags.
- Do not add questions to response.
- Base your response **only** on the retrieved context from the PDF.
- If no relevant information is found, simply respond with: "Not found."
- If the user greets you (e.g., "Hello", "Hi", "Good morning"), respond politely but keep it brief.
- If the query is unrelated to Exelsys easyHR, respond with: "I'm here to assist with Exelsys easyHR queries only."
Each step may have an associated image that should be placed **immediately after** the step description.
### **Formatting Rules:**
1. Each step should be numbered (`1., 2., 3., etc.`).
2. If a step has an associated image, include the Markdown image syntax **right after** the step description.
3. Do NOT list all images at the bottom; place them **inline** with their respective steps.
4. If no image is available for a step, simply move to the next step.
### Example Format:
1. Step 1 Text (description of the step).
<img src"static/output_images/step1.png" class="inline-image" onclick="expandImage(this)">
2. Step 2 Text (description of the step).
3. Step 3 Text (description of the step).
<img src"static/output_images/step2.png" class="inline-image" onclick="expandImage(this)">
### USER QUERY:
{query}
### CONTEXT FROM PDF:
{retrieved_text}
### ANSWER:
"""
)
# Chain the prompt with ChatGroq
chain_extract = prompt_extract | llm
chat_response = chain_extract.invoke({"query": query, "retrieved_text": retrieved_text})
# Convert response to string
response_text = str(chat_response.content)
# Final response JSON
response = {
"text": response_text # Store multiple images in a list
}
print(response)
return jsonify(response)
if __name__ == "__main__":
app.run(debug=True)
|