File size: 6,342 Bytes
77c8f74 670a9d5 c85bec4 77c8f74 c85bec4 77c8f74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
from flask import Flask, render_template, request, jsonify
import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
import re
import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
import fitz # PyMuPDF for text extraction
from pdf2image import convert_from_path
import json
import os
app = Flask(__name__)
model = SentenceTransformer('./sentence-transformers_all-MiniLM-L6-v2')
index = faiss.read_index("./faiss_index.bin")
groq_api_key = os.getenv('GROQ_API_KEY')
model_name = "llama-3.3-70b-versatile"
llm = ChatGroq(
with open("./metadata.json") as f:
metadata = json.load(f)
def categorize_query(query):
Categorizes user queries into different types (greetings, small talk, unrelated, etc.).
query = query.lower().strip()
# Greetings
greeting_patterns = [
r"\bhello\b", r"\bhi\b", r"\bhey\b", r"\bhola\b", r"\bgreetings\b",
r"\bwhat('s| is) up\b", r"\bhowdy\b", r"\bhiya\b", r"\byo\b",
r"\bgood (morning|afternoon|evening|day|night)\b",
r"\bhow (are|r) you\b", r"\bhow's it going\b", r"\bhow have you been\b",
r"\bhope you are (doing )?(well|good|fine)\b", r"\bnice to meet you\b",
r"\bpleased to meet you\b"
# Thank-you messages
thank_you_patterns = [
r"\bthank(s| you)\b", r"\bthanks a lot\b", r"\bthanks so much\b",
r"\bthank you very much\b", r"\bappreciate it\b", r"\bmuch obliged\b",
r"\bgrateful\b", r"\bcheers\b"
# Small talk
small_talk_patterns = [
r"\bhow (are|r) you\b", r"\bhow's your day\b", r"\bwhat's up\b",
r"\bhow's it going\b", r"\bhow have you been\b", r"\bhope you are well\b"
# Unrelated topics
unrelated_patterns = [
r"\btell me a joke\b", r"\bwho won\b", r"\bwhat is ai\b", r"\bexplain blockchain\b"
# Goodbye messages
goodbye_patterns = [
r"\bbye\b", r"\bgoodbye\b", r"\bsee you\b", r"\bhave a nice day\b"
# Rude or inappropriate messages
rude_patterns = [
r"\bstupid\b", r"\bdumb\b", r"\buseless\b", r"\bshut up\b"
if any(, query) for pattern in greeting_patterns):
return "greeting"
if any(, query) for pattern in thank_you_patterns):
return "thank_you"
if any(, query) for pattern in small_talk_patterns):
return "small_talk"
if any(, query) for pattern in unrelated_patterns):
return "unrelated"
if any(, query) for pattern in goodbye_patterns):
return "goodbye"
if any(, query) for pattern in rude_patterns):
return "rude"
return "normal"
# Function to Search for Relevant Answers
def search_text(query, top_k=2):
query_embedding = np.array(model.encode(query, convert_to_numpy=True)).astype("float32").reshape(1, -1)
distances, indices =, top_k)
results = []
for idx in indices[0]:
if idx >= 0:
return results
# Serve HTML Page
def home():
return render_template("index.html")
@app.route("/query", methods=["POST"])
def query_pdf():
query = request.json.get("query")
query_type = categorize_query(query)
if query_type == "greeting":
return jsonify({"text": "Hello! How can I assist you with Exelsys EasyHR?", "images": []})
if query_type == "thank_you":
return jsonify({"text": "You're welcome! How can I assist you further?", "images": []})
if query_type == "small_talk":
return jsonify({"text": "I'm here to assist with Exelsys EasyHR. How can I help?", "images": []})
if query_type == "unrelated":
return jsonify({"text": "I'm here to assist with Exelsys easyHR queries only.", "images": []})
if query_type == "vague":
return jsonify({"text": "Could you please provide more details?", "images": []})
if query_type == "goodbye":
return jsonify({"text": "You're welcome! Have a great day!", "images": []})
if query_type == "rude":
return jsonify({"text": "I'm here to assist you professionally.", "images": []})
# Search for relevant PDF content using FAISS
results = search_text(query, top_k=3)
if not results:
return jsonify({"text": "No relevant results found in the PDF.", "images": []})
# Merge multiple text results
retrieved_text = "\n\n---\n\n".join([res["text"] for res in results])
prompt_extract = PromptTemplate.from_template(
- Your job is to provide step-by-step guidance for the following user query.
- Base your response **only** on the retrieved context from the PDF.
- If no relevant information is found, simply respond with: "Not found."
- If the user greets you (e.g., "Hello", "Hi", "Good morning"), respond politely but keep it brief.
- If the query is unrelated to Exelsys easyHR, respond with: "I'm here to assist with Exelsys easyHR queries only."
# Chain the prompt with ChatGroq
chain_extract = prompt_extract | llm
chat_response = chain_extract.invoke({"query": query, "retrieved_text": retrieved_text})
# Convert response to string
response_text = str(chat_response.content)
# Determine if images should be included
retrieved_images = []
if "Not found." not in response_text and "I'm here to assist" not in response_text:
retrieved_images = [img for res in results if "images" in res for img in res["images"]]
# Final response JSON
response = {
"text": response_text,
"images": retrieved_images
return jsonify(response)
if __name__ == "__main__":"", port=7860)