DrishtiSharma commited on
Commit
eb428fa
Β·
verified Β·
1 Parent(s): e0de377

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -13
app.py CHANGED
@@ -9,7 +9,7 @@ from langchain.document_loaders import PDFPlumberLoader
9
  from langchain_experimental.text_splitter import SemanticChunker
10
  from langchain_huggingface import HuggingFaceEmbeddings
11
  from langchain_chroma import Chroma
12
- from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt, response_synth
13
 
14
  # Set API Keys
15
  os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
@@ -57,7 +57,7 @@ if pdf_source == "Upload a PDF file":
57
 
58
  elif pdf_source == "Enter a PDF URL":
59
  pdf_url = st.text_input("Enter PDF URL:", value="https://arxiv.org/pdf/2406.06998")
60
- if pdf_url and st.session_state.pdf_path is None:
61
  with st.spinner("Downloading PDF..."):
62
  try:
63
  response = requests.get(pdf_url)
@@ -75,28 +75,31 @@ elif pdf_source == "Enter a PDF URL":
75
  st.error(f"Error downloading PDF: {e}")
76
 
77
  # Step 2: Process PDF
78
- if st.session_state.pdf_path and not st.session_state.pdf_loaded:
79
  with st.spinner("Loading and processing PDF..."):
80
  loader = PDFPlumberLoader(st.session_state.pdf_path)
81
  docs = loader.load()
82
  st.session_state.documents = docs
83
- st.session_state.pdf_loaded = True
84
  st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
85
 
86
  # Step 3: Chunking
87
- if st.session_state.pdf_loaded and not st.session_state.chunked and st.session_state.documents:
88
  with st.spinner("Chunking the document..."):
89
  model_name = "nomic-ai/modernbert-embed-base"
90
  embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})
91
  text_splitter = SemanticChunker(embedding_model)
92
  documents = text_splitter.split_documents(st.session_state.documents)
93
- st.session_state.documents = documents # Store chunked docs
94
- st.session_state.chunked = True
95
  st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
96
 
97
  # Step 4: Setup Vectorstore
98
- if st.session_state.chunked and not st.session_state.vector_created:
99
  with st.spinner("Creating vector store..."):
 
 
 
100
  vector_store = Chroma(
101
  collection_name="deepseek_collection",
102
  collection_metadata={"hnsw:space": "cosine"},
@@ -106,13 +109,13 @@ if st.session_state.chunked and not st.session_state.vector_created:
106
  vector_store.add_documents(st.session_state.documents)
107
  num_documents = len(vector_store.get()["documents"])
108
  st.session_state.vector_store = vector_store
109
- st.session_state.vector_created = True
110
  st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
111
 
112
- # Step 5: Query Input
113
- if st.session_state.vector_created and st.session_state.vector_store:
114
  query = st.text_input("πŸ” Enter a Query:")
115
-
116
  if query:
117
  with st.spinner("Retrieving relevant contexts..."):
118
  retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
@@ -130,4 +133,4 @@ if st.session_state.vector_created and st.session_state.vector_store:
130
  final_response = response_chain.invoke({"query": query, "context": context_texts})
131
 
132
  st.subheader("πŸŸ₯ RAG Final Response")
133
- st.success(final_response['final_response'])
 
9
  from langchain_experimental.text_splitter import SemanticChunker
10
  from langchain_huggingface import HuggingFaceEmbeddings
11
  from langchain_chroma import Chroma
12
+ from prompts import rag_prompt
13
 
14
  # Set API Keys
15
  os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
 
57
 
58
  elif pdf_source == "Enter a PDF URL":
59
  pdf_url = st.text_input("Enter PDF URL:", value="https://arxiv.org/pdf/2406.06998")
60
+ if pdf_url and not st.session_state.get("pdf_loaded", False):
61
  with st.spinner("Downloading PDF..."):
62
  try:
63
  response = requests.get(pdf_url)
 
75
  st.error(f"Error downloading PDF: {e}")
76
 
77
  # Step 2: Process PDF
78
+ if st.session_state.pdf_path and not st.session_state.get("pdf_loaded", False):
79
  with st.spinner("Loading and processing PDF..."):
80
  loader = PDFPlumberLoader(st.session_state.pdf_path)
81
  docs = loader.load()
82
  st.session_state.documents = docs
83
+ st.session_state.pdf_loaded = True # βœ… Prevent re-loading
84
  st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
85
 
86
  # Step 3: Chunking
87
+ if st.session_state.get("pdf_loaded", False) and not st.session_state.get("chunked", False):
88
  with st.spinner("Chunking the document..."):
89
  model_name = "nomic-ai/modernbert-embed-base"
90
  embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})
91
  text_splitter = SemanticChunker(embedding_model)
92
  documents = text_splitter.split_documents(st.session_state.documents)
93
+ st.session_state.documents = documents # βœ… Store chunked docs
94
+ st.session_state.chunked = True # βœ… Prevent re-chunking
95
  st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
96
 
97
  # Step 4: Setup Vectorstore
98
+ if st.session_state.get("chunked", False) and not st.session_state.get("vector_created", False):
99
  with st.spinner("Creating vector store..."):
100
+ model_name = "nomic-ai/modernbert-embed-base"
101
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})
102
+
103
  vector_store = Chroma(
104
  collection_name="deepseek_collection",
105
  collection_metadata={"hnsw:space": "cosine"},
 
109
  vector_store.add_documents(st.session_state.documents)
110
  num_documents = len(vector_store.get()["documents"])
111
  st.session_state.vector_store = vector_store
112
+ st.session_state.vector_created = True # βœ… Prevent re-creating vector store
113
  st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
114
 
115
+ # Step 5: Query Input (this should not trigger previous steps!)
116
+ if st.session_state.get("vector_created", False) and st.session_state.get("vector_store", None):
117
  query = st.text_input("πŸ” Enter a Query:")
118
+
119
  if query:
120
  with st.spinner("Retrieving relevant contexts..."):
121
  retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
 
133
  final_response = response_chain.invoke({"query": query, "context": context_texts})
134
 
135
  st.subheader("πŸŸ₯ RAG Final Response")
136
+ st.success(final_response['final_response'])