|
|
|
|
|
from flask import Flask, render_template, request, jsonify |
|
import faiss |
|
import numpy as np |
|
import json |
|
from sentence_transformers import SentenceTransformer |
|
from langchain.prompts import PromptTemplate |
|
from langchain_groq import ChatGroq |
|
import re |
|
import os |
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
app = Flask(__name__) |
|
|
|
|
|
model = SentenceTransformer('./sentence-transformers_all-MiniLM-L6-v2') |
|
index = faiss.read_index("faiss_index.bin") |
|
groq_api_key = os.getenv('GROQ_API_KEY') |
|
model_name = "llama-3.3-70b-versatile" |
|
|
|
llm = ChatGroq( |
|
temperature=0, |
|
groq_api_key=groq_api_key, |
|
model_name=model_name |
|
) |
|
|
|
with open("metadata.json") as f: |
|
metadata = json.load(f) |
|
|
|
|
|
def categorize_query(query): |
|
""" |
|
Categorizes user queries into different types (greetings, small talk, unrelated, etc.). |
|
""" |
|
query = query.lower().strip() |
|
|
|
|
|
greeting_patterns = [ |
|
r"\bhello\b", r"\bhi\b", r"\bhey\b", r"\bhola\b", r"\bgreetings\b", |
|
r"\bwhat('s| is) up\b", r"\bhowdy\b", r"\bhiya\b", r"\byo\b", |
|
r"\bgood (morning|afternoon|evening|day|night)\b", |
|
r"\bhow (are|r) you\b", r"\bhow's it going\b", r"\bhow have you been\b", |
|
r"\bhope you are (doing )?(well|good|fine)\b", r"\bnice to meet you\b", |
|
r"\bpleased to meet you\b" |
|
] |
|
|
|
|
|
thank_you_patterns = [ |
|
r"\bthank(s| you)\b", r"\bthanks a lot\b", r"\bthanks so much\b", |
|
r"\bthank you very much\b", r"\bappreciate it\b", r"\bmuch obliged\b", |
|
r"\bgrateful\b", r"\bcheers\b" |
|
] |
|
|
|
|
|
small_talk_patterns = [ |
|
r"\bhow (are|r) you\b", r"\bhow's your day\b", r"\bwhat's up\b", |
|
r"\bhow's it going\b", r"\bhow have you been\b", r"\bhope you are well\b" |
|
] |
|
|
|
|
|
unrelated_patterns = [ |
|
r"\btell me a joke\b", r"\bwho won\b", r"\bwhat is ai\b", r"\bexplain blockchain\b" |
|
] |
|
|
|
|
|
|
|
goodbye_patterns = [ |
|
r"\bbye\b", r"\bgoodbye\b", r"\bsee you\b", r"\bhave a nice day\b" |
|
] |
|
|
|
|
|
rude_patterns = [ |
|
r"\bstupid\b", r"\bdumb\b", r"\buseless\b", r"\bshut up\b" |
|
] |
|
|
|
if any(re.search(pattern, query) for pattern in greeting_patterns): |
|
return "greeting" |
|
if any(re.search(pattern, query) for pattern in thank_you_patterns): |
|
return "thank_you" |
|
if any(re.search(pattern, query) for pattern in small_talk_patterns): |
|
return "small_talk" |
|
if any(re.search(pattern, query) for pattern in unrelated_patterns): |
|
return "unrelated" |
|
if any(re.search(pattern, query) for pattern in goodbye_patterns): |
|
return "goodbye" |
|
if any(re.search(pattern, query) for pattern in rude_patterns): |
|
return "rude" |
|
|
|
return "normal" |
|
|
|
|
|
def search_text(query, top_k=2): |
|
query_embedding = np.array(model.encode(query, convert_to_numpy=True)).astype("float32").reshape(1, -1) |
|
distances, indices = index.search(query_embedding, top_k) |
|
|
|
results = [] |
|
for idx in indices[0]: |
|
if idx >= 0: |
|
results.append(metadata[idx]) |
|
|
|
return results |
|
|
|
|
|
@app.route("/") |
|
def home(): |
|
return render_template("index.html") |
|
|
|
@app.route("/query", methods=["POST"]) |
|
def query_pdf(): |
|
query = request.json.get("query") |
|
|
|
query_type = categorize_query(query) |
|
|
|
if query_type == "greeting": |
|
return jsonify({"text": "Hello! How can I assist you with Exelsys EasyHR?", "images": []}) |
|
|
|
if query_type == "thank_you": |
|
return jsonify({"text": "You're welcome! How can I assist you further?", "images": []}) |
|
|
|
if query_type == "small_talk": |
|
return jsonify({"text": "I'm here to assist with Exelsys EasyHR. How can I help?", "images": []}) |
|
|
|
if query_type == "unrelated": |
|
return jsonify({"text": "I'm here to assist with Exelsys easyHR queries only.", "images": []}) |
|
|
|
if query_type == "vague": |
|
return jsonify({"text": "Could you please provide more details?", "images": []}) |
|
|
|
if query_type == "goodbye": |
|
return jsonify({"text": "You're welcome! Have a great day!", "images": []}) |
|
|
|
if query_type == "rude": |
|
return jsonify({"text": "I'm here to assist you professionally.", "images": []}) |
|
|
|
|
|
|
|
|
|
results = search_text(query, top_k=3) |
|
|
|
if not results: |
|
return jsonify({"text": "No relevant results found in the PDF.", "images": []}) |
|
|
|
|
|
retrieved_text = "\n\n---\n\n".join([res["text"] for res in results]) |
|
print(retrieved_text) |
|
|
|
prompt_extract = PromptTemplate.from_template( |
|
""" |
|
### YOU ARE AN EXELSYS EASYHR GUIDE ASSISTANT: |
|
### INSTRUCTIONS: |
|
- Your job is to provide step-by-step guidance for the following user query based on the provided context. |
|
- Base your response **only** on the retrieved context from the PDF. |
|
- If no relevant information is found, simply respond with: "Not found." |
|
- If the user greets you (e.g., "Hello", "Hi", "Good morning"), respond politely but keep it brief. |
|
- If the query is unrelated to Exelsys easyHR, respond with: "I'm here to assist with Exelsys easyHR queries only." |
|
- Provide clear and concise answers. |
|
- Provide all the links that inside any topic in <a> tag. |
|
|
|
|
|
### USER QUERY: |
|
{query} |
|
|
|
### CONTEXT FROM PDF: |
|
{retrieved_text} |
|
|
|
### ANSWER: |
|
""" |
|
) |
|
|
|
|
|
chain_extract = prompt_extract | llm |
|
chat_response = chain_extract.invoke({"query": query, "retrieved_text": retrieved_text}) |
|
|
|
|
|
response_text = str(chat_response.content) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
response = { |
|
"text": response_text, |
|
|
|
} |
|
print(response) |
|
|
|
return jsonify(response) |
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860) |
|
|
|
|