subashdvorak commited on
Commit
4aff460
·
verified ·
1 Parent(s): 1705a30

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz # PyMuPDF
3
+ import faiss
4
+ from sentence_transformers import SentenceTransformer
5
+ import numpy as np
6
+ from phi.agent import Agent
7
+ from phi.model.groq import Groq
8
+
9
+ # Load embedding model
10
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
11
+
12
+ def agent_response(question, retrieved_text):
13
+ agent = Agent(
14
+ model=Groq(id="llama-3.3-70b-versatile"),
15
+ markdown=True,
16
+ description="You are an AI assistant that provides the answer based on the provided document.",
17
+ instructions=[
18
+ f"First read the question carefully. The question is: **{question}**",
19
+ f"Then read the document provided to you as a text. The document is: \n**{retrieved_text}**\n",
20
+ "Finally answer the question based on the provided document only. Don't try to give random responses."
21
+ ]
22
+ )
23
+ response = agent.run(question + '\n' + retrieved_text).content
24
+ return response
25
+
26
+ class PDFChatbot:
27
+ def __init__(self):
28
+ self.text_chunks = []
29
+ self.index = None
30
+
31
+ def process_pdf(self, pdf_file):
32
+ """Extract text from PDF and create FAISS index."""
33
+ self.text_chunks = []
34
+ doc = fitz.open(pdf_file)
35
+ for page in doc:
36
+ self.text_chunks.append(page.get_text("text"))
37
+
38
+ # Embed the chunks
39
+ embeddings = embedding_model.encode(self.text_chunks, convert_to_numpy=True)
40
+
41
+ # Create FAISS index
42
+ self.index = faiss.IndexFlatL2(embeddings.shape[1])
43
+ self.index.add(embeddings)
44
+ return "PDF processed successfully!"
45
+
46
+ def chat(self, query):
47
+ """Retrieve the most relevant chunk for a query."""
48
+ if self.index is None:
49
+ return "Please upload a PDF first."
50
+
51
+ query_embedding = embedding_model.encode([query], convert_to_numpy=True)
52
+ _, indices = self.index.search(query_embedding, 1) # Get top match
53
+ retrieved_text = self.text_chunks[indices[0][0]]
54
+ response = agent_response(query, retrieved_text)
55
+ return response
56
+
57
+ # Instantiate chatbot
58
+ chatbot = PDFChatbot()
59
+
60
+ st.title("Chat with your PDF")
61
+
62
+ uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
63
+ if uploaded_file:
64
+ message = chatbot.process_pdf(uploaded_file)
65
+ st.success(message)
66
+
67
+ query = st.text_input("Ask a question")
68
+ if st.button("Ask"):
69
+ if query:
70
+ response = chatbot.chat(query)
71
+ st.text_area("Answer", response, height=200)