Spaces:

waqasali1707
/

rag_based_QA

Sleeping

App Files Files Community

waqasali1707 commited on Oct 23, 2024

Commit

7a3fac5

verified ·

1 Parent(s): b2da4a4

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -18

app.py CHANGED Viewed

@@ -7,16 +7,23 @@ from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import Chroma
 import os
-# Initialize session state for storing the vector database
 if 'vectordb' not in st.session_state:
-    st.session_state.vectordb = None
 if 'model' not in st.session_state:
     st.session_state.model = None
 if 'tokenizer' not in st.session_state:
     st.session_state.tokenizer = None
 st.title("PDF Question Answering System")
 # File uploader for PDFs
 def load_pdfs():
     uploaded_files = st.file_uploader("Upload your PDF files", type=['pdf'], accept_multiple_files=True)
@@ -47,10 +54,14 @@ def load_pdfs():
             text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
             splits = text_splitter.split_documents(documents)
-            # Create embeddings and vector store
             embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-            st.session_state.vectordb = Chroma.from_documents(documents=splits, embedding=embeddings)
             st.success("PDFs processed successfully!")
             return True
     return False
@@ -84,17 +95,12 @@ def combine_documents_and_answer(retrieved_docs, question, model, tokenizer):
     context = "\n".join(doc.page_content for doc in retrieved_docs)
     prompt = f"""You are an assistant tasked with answering questions based SOLELY on the provided context.
 Do not use any external knowledge or information not present in the given context.
-If the question is of any other field and irrelevant to the context provided, repond just with "I can't tell you this, ask something from the provided context." DO NOT INCLUDE YOUR OWN OPINION.
-IMPORTANT: Your answer should be well structured and meaningful. It should stop generating when it is done. Do not generate or repeat absurd sentences.
-Your answer should elaborate every tiny detail mentioned in the context.
-So, answer the following question within the context in detail:
 Question: {question}
 Context:
 {context}
 Answer:"""
     return generate_response(prompt, model, tokenizer)
@@ -107,10 +113,10 @@ def main():
     # Model path input
     model_path = st.sidebar.text_input("Enter the path to your model:",
-                                     placeholder="waqasali1707/llama_3.2_3B_4_bit_Quan")
     # Load PDFs first
-    if st.session_state.vectordb is None:
         pdfs_processed = load_pdfs()
         if not pdfs_processed:
             st.info("Please upload PDF files and click 'Process PDFs' to continue.")
@@ -127,7 +133,7 @@ def main():
                 return
     # Question answering interface
-    if st.session_state.vectordb is not None and st.session_state.model is not None:
         question = st.text_area("Enter your question:", height=100)
         if st.button("Get Answer"):
@@ -135,7 +141,7 @@ def main():
                 with st.spinner("Generating answer..."):
                     try:
                         # Get relevant documents
-                        retriever = st.session_state.vectordb.as_retriever(search_kwargs={"k": 4})
                         retrieved_docs = retriever.get_relevant_documents(question)
                         # Generate answer
@@ -162,4 +168,4 @@ def main():
                 st.warning("Please enter a question.")
 if __name__ == "__main__":
-    main()

 from langchain.vectorstores import Chroma
 import os
+# Initialize session state for storing the vector database and tenant
 if 'vectordb' not in st.session_state:
+    st.session_state.vectordb = {}
 if 'model' not in st.session_state:
     st.session_state.model = None
 if 'tokenizer' not in st.session_state:
     st.session_state.tokenizer = None
+if 'tenant' not in st.session_state:
+    st.session_state.tenant = "default_tenant"  # Default tenant
 st.title("PDF Question Answering System")
+# Tenant selection
+st.sidebar.title("Settings")
+tenant = st.sidebar.text_input("Enter your tenant:", value=st.session_state.tenant)
+st.session_state.tenant = tenant  # Update the tenant in session state
 # File uploader for PDFs
 def load_pdfs():
     uploaded_files = st.file_uploader("Upload your PDF files", type=['pdf'], accept_multiple_files=True)
             text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
             splits = text_splitter.split_documents(documents)
+            # Create embeddings and vector store for the current tenant
             embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+            if st.session_state.tenant not in st.session_state.vectordb:
+                st.session_state.vectordb[st.session_state.tenant] = Chroma.from_documents(documents=splits, embedding=embeddings)
+            else:
+                # Update the existing vector store for the tenant
+                st.session_state.vectordb[st.session_state.tenant].add_documents(splits)
             st.success("PDFs processed successfully!")
             return True
     return False
     context = "\n".join(doc.page_content for doc in retrieved_docs)
     prompt = f"""You are an assistant tasked with answering questions based SOLELY on the provided context.
 Do not use any external knowledge or information not present in the given context.
+If the question is of any other field and irrelevant to the context provided, respond just with "I can't tell you this, ask something from the provided context."
+DO NOT INCLUDE YOUR OWN OPINION. IMPORTANT: Your answer should be well structured and meaningful.
+Your answer should elaborate every tiny detail mentioned in the context. So, answer the following question within the context in detail:
 Question: {question}
 Context:
 {context}
 Answer:"""
     return generate_response(prompt, model, tokenizer)
     # Model path input
     model_path = st.sidebar.text_input("Enter the path to your model:",
+                                        placeholder="waqasali1707/llama_3.2_3B_4_bit_Quan")
     # Load PDFs first
+    if st.session_state.tenant not in st.session_state.vectordb:
         pdfs_processed = load_pdfs()
         if not pdfs_processed:
             st.info("Please upload PDF files and click 'Process PDFs' to continue.")
                 return
     # Question answering interface
+    if st.session_state.tenant in st.session_state.vectordb and st.session_state.model is not None:
         question = st.text_area("Enter your question:", height=100)
         if st.button("Get Answer"):
                 with st.spinner("Generating answer..."):
                     try:
                         # Get relevant documents
+                        retriever = st.session_state.vectordb[st.session_state.tenant].as_retriever(search_kwargs={"k": 4})
                         retrieved_docs = retriever.get_relevant_documents(question)
                         # Generate answer
                 st.warning("Please enter a question.")
 if __name__ == "__main__":
+    main()