from flask import Flask, render_template, request, jsonify import faiss import numpy as np import json from sentence_transformers import SentenceTransformer from langchain.prompts import PromptTemplate from langchain_groq import ChatGroq import re from langchain_openai import AzureChatOpenAI import os from dotenv import load_dotenv load_dotenv() app = Flask(__name__) # Load Model and FAISS Index AzureOpenAiEndpoint= os.getenv("AZURE_OPENAI_ENDPOINT") AzureOpenAiKey= os.getenv("AZURE_OPENAI_KEY") OpenaiApiVersion= os.getenv("OPENAI_API_VERSION") DeploymentName= os.getenv("DEPLOYMENT_NAME") model = SentenceTransformer('./sentence-transformers_all-MiniLM-L6-v2') index = faiss.read_index("faiss_index.bin") # groq_api_key = os.getenv('GROQ_API_KEY') # model_name = "llama-3.3-70b-versatile" llm = AzureChatOpenAI( openai_api_version= OpenaiApiVersion, # Change based on your API version deployment_name = DeploymentName, # The model deployment name in Azure openai_api_key= AzureOpenAiKey, # Use the environment variable azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], # Use the environment variable ) # llm = ChatGroq( # temperature=0, # groq_api_key=groq_api_key, # model_name=model_name # ) with open("metadata.json") as f: metadata = json.load(f) def categorize_query(query): """ Categorizes user queries into different types (greetings, small talk, unrelated, etc.). """ query = query.lower().strip() # Greetings greeting_patterns = [ r"\bhello\b", r"\bhi\b", r"\bhey\b", r"\bhola\b", r"\bgreetings\b", r"\bwhat('s| is) up\b", r"\bhowdy\b", r"\bhiya\b", r"\byo\b", r"\bgood (morning|afternoon|evening|day|night)\b", r"\bhow (are|r) you\b", r"\bhow's it going\b", r"\bhow have you been\b", r"\bhope you are (doing )?(well|good|fine)\b", r"\bnice to meet you\b", r"\bpleased to meet you\b" ] # Thank-you messages thank_you_patterns = [ r"\bthank(s| you)\b", r"\bthanks a lot\b", r"\bthanks so much\b", r"\bthank you very much\b", r"\bappreciate it\b", r"\bmuch obliged\b", r"\bgrateful\b", r"\bcheers\b" ] # Small talk small_talk_patterns = [ r"\bhow (are|r) you\b", r"\bhow's your day\b", r"\bwhat's up\b", r"\bhow's it going\b", r"\bhow have you been\b", r"\bhope you are well\b" ] # Unrelated topics unrelated_patterns = [ r"\btell me a joke\b", r"\bwho won\b", r"\bwhat is ai\b", r"\bexplain blockchain\b" ] # Goodbye messages goodbye_patterns = [ r"\bbye\b", r"\bgoodbye\b", r"\bsee you\b", r"\bhave a nice day\b" ] # Rude or inappropriate messages rude_patterns = [ r"\bstupid\b", r"\bdumb\b", r"\buseless\b", r"\bshut up\b" ] if any(re.search(pattern, query) for pattern in greeting_patterns): return "greeting" if any(re.search(pattern, query) for pattern in thank_you_patterns): return "thank_you" if any(re.search(pattern, query) for pattern in small_talk_patterns): return "small_talk" if any(re.search(pattern, query) for pattern in unrelated_patterns): return "unrelated" if any(re.search(pattern, query) for pattern in goodbye_patterns): return "goodbye" if any(re.search(pattern, query) for pattern in rude_patterns): return "rude" return "normal" # Function to Search for Relevant Answers def search_text(query, top_k=2): query_embedding = np.array(model.encode(query, convert_to_numpy=True)).astype("float32").reshape(1, -1) distances, indices = index.search(query_embedding, top_k) results = [] for idx in indices[0]: if idx >= 0: results.append(metadata[idx]) return results # Serve HTML Page @app.route("/") def home(): return render_template("index.html") import re @app.route("/query", methods=["POST"]) def query_pdf(): query = request.json.get("query") query_type = categorize_query(query) if query_type == "greeting": return jsonify({"text": "Hello! How can I assist you with Exelsys EasyHR?", "images": []}) if query_type == "thank_you": return jsonify({"text": "You're welcome! How can I assist you further?", "images": []}) if query_type == "small_talk": return jsonify({"text": "I'm here to assist with Exelsys EasyHR. How can I help?", "images": []}) if query_type == "unrelated": return jsonify({"text": "I'm here to assist with Exelsys easyHR queries only.", "images": []}) if query_type == "vague": return jsonify({"text": "Could you please provide more details?", "images": []}) if query_type == "goodbye": return jsonify({"text": "You're welcome! Have a great day!", "images": []}) if query_type == "rude": return jsonify({"text": "I'm here to assist you professionally.", "images": []}) # Search for relevant PDF content using FAISS results = search_text(query, top_k=3) if not results: return jsonify({"text": "No relevant results found in the PDF.", "images": []}) # Merge multiple text results retrieved_text = "\n\n---\n\n".join([res["text"] for res in results]) print(retrieved_text) prompt_extract = PromptTemplate.from_template( """ ### YOU ARE AN EXELSYS EASYHR GUIDE ASSISTANT: ### INSTRUCTIONS: ### YOU ARE AN EXELSYS EASYHR GUIDE ASSISTANT: ### INSTRUCTIONS: - Your job is to provide step-by-step guidance for the following user query. - Check user query and provide relevant information from the Exelsys easyHR PDF. - Only provide image path if they are relevant to or you response and user query. - Only format the text response in html tags in steps form and not image paths. Don't provide headings in big tags. - Do not add questions to response. - Base your response **only** on the retrieved context from the PDF. - If no relevant information is found, simply respond with: "Not found." - If the user greets you (e.g., "Hello", "Hi", "Good morning"), respond politely but keep it brief. - If the query is unrelated to Exelsys easyHR, respond with: "I'm here to assist with Exelsys easyHR queries only." Each step may have an associated image that should be placed **immediately after** the step description. ### **Formatting Rules:** 1. Each step should be numbered (`1., 2., 3., etc.`). 2. If a step has an associated image, include the Markdown image syntax **right after** the step description. 3. Do NOT list all images at the bottom; place them **inline** with their respective steps. 4. If no image is available for a step, simply move to the next step. ### Example Format: 1. Step 1 Text (description of the step). 2. Step 2 Text (description of the step). 3. Step 3 Text (description of the step). ### USER QUERY: {query} ### CONTEXT FROM PDF: {retrieved_text} ### ANSWER: """ ) # Chain the prompt with ChatGroq chain_extract = prompt_extract | llm chat_response = chain_extract.invoke({"query": query, "retrieved_text": retrieved_text}) # Convert response to string response_text = str(chat_response.content) # Final response JSON response = { "text": response_text # Store multiple images in a list } print(response) return jsonify(response) if __name__ == "__main__": app.run(debug=True)