waqasali1707 commited on
Commit
7a3fac5
·
verified ·
1 Parent(s): b2da4a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -18
app.py CHANGED
@@ -7,16 +7,23 @@ from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.vectorstores import Chroma
8
  import os
9
 
10
- # Initialize session state for storing the vector database
11
  if 'vectordb' not in st.session_state:
12
- st.session_state.vectordb = None
13
  if 'model' not in st.session_state:
14
  st.session_state.model = None
15
  if 'tokenizer' not in st.session_state:
16
  st.session_state.tokenizer = None
 
 
17
 
18
  st.title("PDF Question Answering System")
19
 
 
 
 
 
 
20
  # File uploader for PDFs
21
  def load_pdfs():
22
  uploaded_files = st.file_uploader("Upload your PDF files", type=['pdf'], accept_multiple_files=True)
@@ -47,10 +54,14 @@ def load_pdfs():
47
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
48
  splits = text_splitter.split_documents(documents)
49
 
50
- # Create embeddings and vector store
51
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
52
- st.session_state.vectordb = Chroma.from_documents(documents=splits, embedding=embeddings)
53
-
 
 
 
 
54
  st.success("PDFs processed successfully!")
55
  return True
56
  return False
@@ -84,17 +95,12 @@ def combine_documents_and_answer(retrieved_docs, question, model, tokenizer):
84
  context = "\n".join(doc.page_content for doc in retrieved_docs)
85
  prompt = f"""You are an assistant tasked with answering questions based SOLELY on the provided context.
86
  Do not use any external knowledge or information not present in the given context.
87
- If the question is of any other field and irrelevant to the context provided, repond just with "I can't tell you this, ask something from the provided context." DO NOT INCLUDE YOUR OWN OPINION.
88
-
89
- IMPORTANT: Your answer should be well structured and meaningful. It should stop generating when it is done. Do not generate or repeat absurd sentences.
90
- Your answer should elaborate every tiny detail mentioned in the context.
91
- So, answer the following question within the context in detail:
92
-
93
  Question: {question}
94
-
95
  Context:
96
  {context}
97
-
98
  Answer:"""
99
  return generate_response(prompt, model, tokenizer)
100
 
@@ -107,10 +113,10 @@ def main():
107
 
108
  # Model path input
109
  model_path = st.sidebar.text_input("Enter the path to your model:",
110
- placeholder="waqasali1707/llama_3.2_3B_4_bit_Quan")
111
 
112
  # Load PDFs first
113
- if st.session_state.vectordb is None:
114
  pdfs_processed = load_pdfs()
115
  if not pdfs_processed:
116
  st.info("Please upload PDF files and click 'Process PDFs' to continue.")
@@ -127,7 +133,7 @@ def main():
127
  return
128
 
129
  # Question answering interface
130
- if st.session_state.vectordb is not None and st.session_state.model is not None:
131
  question = st.text_area("Enter your question:", height=100)
132
 
133
  if st.button("Get Answer"):
@@ -135,7 +141,7 @@ def main():
135
  with st.spinner("Generating answer..."):
136
  try:
137
  # Get relevant documents
138
- retriever = st.session_state.vectordb.as_retriever(search_kwargs={"k": 4})
139
  retrieved_docs = retriever.get_relevant_documents(question)
140
 
141
  # Generate answer
@@ -162,4 +168,4 @@ def main():
162
  st.warning("Please enter a question.")
163
 
164
  if __name__ == "__main__":
165
- main()
 
7
  from langchain.vectorstores import Chroma
8
  import os
9
 
10
+ # Initialize session state for storing the vector database and tenant
11
  if 'vectordb' not in st.session_state:
12
+ st.session_state.vectordb = {}
13
  if 'model' not in st.session_state:
14
  st.session_state.model = None
15
  if 'tokenizer' not in st.session_state:
16
  st.session_state.tokenizer = None
17
+ if 'tenant' not in st.session_state:
18
+ st.session_state.tenant = "default_tenant" # Default tenant
19
 
20
  st.title("PDF Question Answering System")
21
 
22
+ # Tenant selection
23
+ st.sidebar.title("Settings")
24
+ tenant = st.sidebar.text_input("Enter your tenant:", value=st.session_state.tenant)
25
+ st.session_state.tenant = tenant # Update the tenant in session state
26
+
27
  # File uploader for PDFs
28
  def load_pdfs():
29
  uploaded_files = st.file_uploader("Upload your PDF files", type=['pdf'], accept_multiple_files=True)
 
54
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
55
  splits = text_splitter.split_documents(documents)
56
 
57
+ # Create embeddings and vector store for the current tenant
58
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
59
+ if st.session_state.tenant not in st.session_state.vectordb:
60
+ st.session_state.vectordb[st.session_state.tenant] = Chroma.from_documents(documents=splits, embedding=embeddings)
61
+ else:
62
+ # Update the existing vector store for the tenant
63
+ st.session_state.vectordb[st.session_state.tenant].add_documents(splits)
64
+
65
  st.success("PDFs processed successfully!")
66
  return True
67
  return False
 
95
  context = "\n".join(doc.page_content for doc in retrieved_docs)
96
  prompt = f"""You are an assistant tasked with answering questions based SOLELY on the provided context.
97
  Do not use any external knowledge or information not present in the given context.
98
+ If the question is of any other field and irrelevant to the context provided, respond just with "I can't tell you this, ask something from the provided context."
99
+ DO NOT INCLUDE YOUR OWN OPINION. IMPORTANT: Your answer should be well structured and meaningful.
100
+ Your answer should elaborate every tiny detail mentioned in the context. So, answer the following question within the context in detail:
 
 
 
101
  Question: {question}
 
102
  Context:
103
  {context}
 
104
  Answer:"""
105
  return generate_response(prompt, model, tokenizer)
106
 
 
113
 
114
  # Model path input
115
  model_path = st.sidebar.text_input("Enter the path to your model:",
116
+ placeholder="waqasali1707/llama_3.2_3B_4_bit_Quan")
117
 
118
  # Load PDFs first
119
+ if st.session_state.tenant not in st.session_state.vectordb:
120
  pdfs_processed = load_pdfs()
121
  if not pdfs_processed:
122
  st.info("Please upload PDF files and click 'Process PDFs' to continue.")
 
133
  return
134
 
135
  # Question answering interface
136
+ if st.session_state.tenant in st.session_state.vectordb and st.session_state.model is not None:
137
  question = st.text_area("Enter your question:", height=100)
138
 
139
  if st.button("Get Answer"):
 
141
  with st.spinner("Generating answer..."):
142
  try:
143
  # Get relevant documents
144
+ retriever = st.session_state.vectordb[st.session_state.tenant].as_retriever(search_kwargs={"k": 4})
145
  retrieved_docs = retriever.get_relevant_documents(question)
146
 
147
  # Generate answer
 
168
  st.warning("Please enter a question.")
169
 
170
  if __name__ == "__main__":
171
+ main()