Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,16 +7,23 @@ from langchain.embeddings import HuggingFaceEmbeddings
|
|
7 |
from langchain.vectorstores import Chroma
|
8 |
import os
|
9 |
|
10 |
-
# Initialize session state for storing the vector database
|
11 |
if 'vectordb' not in st.session_state:
|
12 |
-
st.session_state.vectordb =
|
13 |
if 'model' not in st.session_state:
|
14 |
st.session_state.model = None
|
15 |
if 'tokenizer' not in st.session_state:
|
16 |
st.session_state.tokenizer = None
|
|
|
|
|
17 |
|
18 |
st.title("PDF Question Answering System")
|
19 |
|
|
|
|
|
|
|
|
|
|
|
20 |
# File uploader for PDFs
|
21 |
def load_pdfs():
|
22 |
uploaded_files = st.file_uploader("Upload your PDF files", type=['pdf'], accept_multiple_files=True)
|
@@ -47,10 +54,14 @@ def load_pdfs():
|
|
47 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
48 |
splits = text_splitter.split_documents(documents)
|
49 |
|
50 |
-
# Create embeddings and vector store
|
51 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
52 |
-
st.session_state.
|
53 |
-
|
|
|
|
|
|
|
|
|
54 |
st.success("PDFs processed successfully!")
|
55 |
return True
|
56 |
return False
|
@@ -84,17 +95,12 @@ def combine_documents_and_answer(retrieved_docs, question, model, tokenizer):
|
|
84 |
context = "\n".join(doc.page_content for doc in retrieved_docs)
|
85 |
prompt = f"""You are an assistant tasked with answering questions based SOLELY on the provided context.
|
86 |
Do not use any external knowledge or information not present in the given context.
|
87 |
-
If the question is of any other field and irrelevant to the context provided,
|
88 |
-
|
89 |
-
|
90 |
-
Your answer should elaborate every tiny detail mentioned in the context.
|
91 |
-
So, answer the following question within the context in detail:
|
92 |
-
|
93 |
Question: {question}
|
94 |
-
|
95 |
Context:
|
96 |
{context}
|
97 |
-
|
98 |
Answer:"""
|
99 |
return generate_response(prompt, model, tokenizer)
|
100 |
|
@@ -107,10 +113,10 @@ def main():
|
|
107 |
|
108 |
# Model path input
|
109 |
model_path = st.sidebar.text_input("Enter the path to your model:",
|
110 |
-
|
111 |
|
112 |
# Load PDFs first
|
113 |
-
if st.session_state.
|
114 |
pdfs_processed = load_pdfs()
|
115 |
if not pdfs_processed:
|
116 |
st.info("Please upload PDF files and click 'Process PDFs' to continue.")
|
@@ -127,7 +133,7 @@ def main():
|
|
127 |
return
|
128 |
|
129 |
# Question answering interface
|
130 |
-
if st.session_state.
|
131 |
question = st.text_area("Enter your question:", height=100)
|
132 |
|
133 |
if st.button("Get Answer"):
|
@@ -135,7 +141,7 @@ def main():
|
|
135 |
with st.spinner("Generating answer..."):
|
136 |
try:
|
137 |
# Get relevant documents
|
138 |
-
retriever = st.session_state.vectordb.as_retriever(search_kwargs={"k": 4})
|
139 |
retrieved_docs = retriever.get_relevant_documents(question)
|
140 |
|
141 |
# Generate answer
|
@@ -162,4 +168,4 @@ def main():
|
|
162 |
st.warning("Please enter a question.")
|
163 |
|
164 |
if __name__ == "__main__":
|
165 |
-
main()
|
|
|
7 |
from langchain.vectorstores import Chroma
|
8 |
import os
|
9 |
|
10 |
+
# Initialize session state for storing the vector database and tenant
|
11 |
if 'vectordb' not in st.session_state:
|
12 |
+
st.session_state.vectordb = {}
|
13 |
if 'model' not in st.session_state:
|
14 |
st.session_state.model = None
|
15 |
if 'tokenizer' not in st.session_state:
|
16 |
st.session_state.tokenizer = None
|
17 |
+
if 'tenant' not in st.session_state:
|
18 |
+
st.session_state.tenant = "default_tenant" # Default tenant
|
19 |
|
20 |
st.title("PDF Question Answering System")
|
21 |
|
22 |
+
# Tenant selection
|
23 |
+
st.sidebar.title("Settings")
|
24 |
+
tenant = st.sidebar.text_input("Enter your tenant:", value=st.session_state.tenant)
|
25 |
+
st.session_state.tenant = tenant # Update the tenant in session state
|
26 |
+
|
27 |
# File uploader for PDFs
|
28 |
def load_pdfs():
|
29 |
uploaded_files = st.file_uploader("Upload your PDF files", type=['pdf'], accept_multiple_files=True)
|
|
|
54 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
55 |
splits = text_splitter.split_documents(documents)
|
56 |
|
57 |
+
# Create embeddings and vector store for the current tenant
|
58 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
59 |
+
if st.session_state.tenant not in st.session_state.vectordb:
|
60 |
+
st.session_state.vectordb[st.session_state.tenant] = Chroma.from_documents(documents=splits, embedding=embeddings)
|
61 |
+
else:
|
62 |
+
# Update the existing vector store for the tenant
|
63 |
+
st.session_state.vectordb[st.session_state.tenant].add_documents(splits)
|
64 |
+
|
65 |
st.success("PDFs processed successfully!")
|
66 |
return True
|
67 |
return False
|
|
|
95 |
context = "\n".join(doc.page_content for doc in retrieved_docs)
|
96 |
prompt = f"""You are an assistant tasked with answering questions based SOLELY on the provided context.
|
97 |
Do not use any external knowledge or information not present in the given context.
|
98 |
+
If the question is of any other field and irrelevant to the context provided, respond just with "I can't tell you this, ask something from the provided context."
|
99 |
+
DO NOT INCLUDE YOUR OWN OPINION. IMPORTANT: Your answer should be well structured and meaningful.
|
100 |
+
Your answer should elaborate every tiny detail mentioned in the context. So, answer the following question within the context in detail:
|
|
|
|
|
|
|
101 |
Question: {question}
|
|
|
102 |
Context:
|
103 |
{context}
|
|
|
104 |
Answer:"""
|
105 |
return generate_response(prompt, model, tokenizer)
|
106 |
|
|
|
113 |
|
114 |
# Model path input
|
115 |
model_path = st.sidebar.text_input("Enter the path to your model:",
|
116 |
+
placeholder="waqasali1707/llama_3.2_3B_4_bit_Quan")
|
117 |
|
118 |
# Load PDFs first
|
119 |
+
if st.session_state.tenant not in st.session_state.vectordb:
|
120 |
pdfs_processed = load_pdfs()
|
121 |
if not pdfs_processed:
|
122 |
st.info("Please upload PDF files and click 'Process PDFs' to continue.")
|
|
|
133 |
return
|
134 |
|
135 |
# Question answering interface
|
136 |
+
if st.session_state.tenant in st.session_state.vectordb and st.session_state.model is not None:
|
137 |
question = st.text_area("Enter your question:", height=100)
|
138 |
|
139 |
if st.button("Get Answer"):
|
|
|
141 |
with st.spinner("Generating answer..."):
|
142 |
try:
|
143 |
# Get relevant documents
|
144 |
+
retriever = st.session_state.vectordb[st.session_state.tenant].as_retriever(search_kwargs={"k": 4})
|
145 |
retrieved_docs = retriever.get_relevant_documents(question)
|
146 |
|
147 |
# Generate answer
|
|
|
168 |
st.warning("Please enter a question.")
|
169 |
|
170 |
if __name__ == "__main__":
|
171 |
+
main()
|