Spaces:
Running
Running
File size: 5,589 Bytes
52c6dbe eb07e3c 52c6dbe eb07e3c 52c6dbe eb07e3c 52c6dbe eb07e3c 52c6dbe eb07e3c 52c6dbe eb07e3c 52c6dbe eb07e3c 52c6dbe eb07e3c 52c6dbe eb07e3c 52c6dbe eb07e3c 52c6dbe eb07e3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import os
from langchain_community.document_loaders import PyMuPDFLoader
import faiss
from langchain_groq import ChatGroq
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from sentence_transformers import SentenceTransformer
import dotenv
dotenv.load_dotenv()
# Initialize LLM and tools globally
def model_selection(model_name):
llm = ChatGroq(model=model_name, api_key=os.getenv("GROQ_API_KEY"))
return llm
tools = [TavilySearchResults(max_results=5)]
# Initialize memory for conversation history
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
def estimate_tokens(text):
"""Estimate the number of tokens in a text (rough approximation)."""
return len(text) // 4
def process_pdf_file(file_path):
"""Load a PDF file and extract its text."""
if not os.path.exists(file_path):
raise FileNotFoundError(f"The file {file_path} does not exist.")
loader = PyMuPDFLoader(file_path)
documents = loader.load()
text = "".join(doc.page_content for doc in documents)
return text
def chunk_text(text, max_length=1500):
"""Split text into chunks based on paragraphs, respecting max_length."""
paragraphs = text.split("\n\n")
chunks = []
current_chunk = ""
for paragraph in paragraphs:
if len(current_chunk) + len(paragraph) <= max_length:
current_chunk += paragraph + "\n\n"
else:
chunks.append(current_chunk.strip())
current_chunk = paragraph + "\n\n"
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
def create_embeddings(texts, model):
"""Create embeddings for a list of texts using the provided model."""
embeddings = model.encode(texts, show_progress_bar=True, convert_to_tensor=True)
return embeddings.cpu().numpy()
def build_faiss_index(embeddings):
"""Build a FAISS index from embeddings for similarity search."""
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)
return index
def retrieve_similar_chunks(query, index, texts, model, k=3, max_chunk_length=3500):
"""Retrieve top k similar chunks to the query from the FAISS index."""
query_embedding = model.encode([query], convert_to_tensor=True).cpu().numpy()
distances, indices = index.search(query_embedding, k)
return [(texts[i][:max_chunk_length], distances[0][j]) for j, i in enumerate(indices[0])]
def agentic_rag(llm, tools, query, context, Use_Tavily=False):
# Define the prompt template for the agent
search_instructions = (
"Use the search tool if the context is insufficient to answer the question or you are unsure. Give source links if you use the search tool."
if Use_Tavily
else "Use the context provided to answer the question."
)
prompt = ChatPromptTemplate.from_messages([
("system", """
You are a helpful assistant. {search_instructions}
Instructions:
1. Use the provided context to answer the user's question.
2. Provide a clear answer, if you don't know the answer, say 'I don't know'.
"""),
("human", "Context: {context}\n\nQuestion: {input}"),
MessagesPlaceholder(variable_name="chat_history"),
MessagesPlaceholder(variable_name="agent_scratchpad"),
])
# Only use tools when Tavily is enabled
agent_tools = tools if Use_Tavily else []
try:
# Create the agent and executor with appropriate tools
agent = create_tool_calling_agent(llm, agent_tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=agent_tools, memory=memory, verbose=True)
# Execute the agent
return agent_executor.invoke({
"input": query,
"context": context,
"search_instructions": search_instructions
})
except Exception as e:
print(f"Error during agent execution: {str(e)}")
# Fallback to direct LLM call without agent framework
fallback_prompt = ChatPromptTemplate.from_messages([
("system", "You are a helpful assistant. Use the provided context to answer the user's question."),
("human", "Context: {context}\n\nQuestion: {input}")
])
response = llm.invoke(fallback_prompt.format(context=context, input=query))
return {"output": response.content}
if __name__ == "__main__":
# Process PDF and prepare index
dotenv.load_dotenv()
pdf_file = "JatinCV.pdf"
llm = model_selection("meta-llama/llama-4-scout-17b-16e-instruct")
texts = process_pdf_file(pdf_file)
chunks = chunk_text(texts, max_length=1500)
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = create_embeddings(chunks, model)
index = build_faiss_index(embeddings)
# Chat loop
print("Chat with the assistant (type 'exit' or 'quit' to stop):")
while True:
query = input("User: ")
if query.lower() in ["exit", "quit"]:
break
# Retrieve similar chunks
similar_chunks = retrieve_similar_chunks(query, index, chunks, model, k=3)
context = "\n".join([chunk for chunk, _ in similar_chunks])
# Generate response
response = agentic_rag(llm, tools, query=query, context=context, Use_Tavily=True)
print("Assistant:", response["output"]) |