DrishtiSharma commited on
Commit
af1d856
Β·
verified Β·
1 Parent(s): 51225e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -98
app.py CHANGED
@@ -56,138 +56,92 @@ else:
56
  pdf_path = None
57
 
58
  # Step 2: Process PDF
59
- with st.spinner("Loading PDF..."):
60
- loader = PDFPlumberLoader(pdf_path)
61
- docs = loader.load()
 
62
 
63
- st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
64
-
65
- # Step 3: Chunking
66
- with st.spinner("Chunking the document..."):
67
- model_name = "nomic-ai/modernbert-embed-base"
68
- embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
69
- text_splitter = SemanticChunker(embedding_model)
70
- documents = text_splitter.split_documents(docs)
71
-
72
- st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
73
-
74
- # Step 4: Setup Vectorstore
75
- with st.spinner("Creating vector store..."):
76
- vector_store = Chroma(
77
- collection_name="deepseek_collection",
78
- collection_metadata={"hnsw:space": "cosine"},
79
- embedding_function=embedding_model
80
- )
81
- vector_store.add_documents(documents)
82
- num_documents = len(vector_store.get()["documents"])
83
-
84
- st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
85
-
86
-
87
- # Step 5: Query Input
88
- query = st.text_input("πŸ” Enter a Query:")
89
- if query:
90
- with st.spinner("Retrieving relevant contexts..."):
91
- retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
92
- contexts = retriever.invoke(query)
93
- context_texts = [doc.page_content for doc in contexts]
94
-
95
- st.success(f"βœ… **Retrieved {len(context_texts)} Contexts!**")
96
- for i, text in enumerate(context_texts, 1):
97
- st.write(f"**Context {i}:** {text[:500]}...")
98
-
99
-
100
-
101
-
102
- -----------------
103
-
104
  # Step 6: Context Relevancy Checker
105
  with st.spinner("Evaluating context relevancy..."):
106
- relevancy_prompt = PromptTemplate(
107
- input_variables=["retriever_query", "context"],
108
- template="""You are an expert judge. Assign relevancy scores (0 or 1) for each context to answer the query.
109
-
110
- CONTEXT LIST:
111
- {context}
112
-
113
- QUERY:
114
- {retriever_query}
115
-
116
- RESPONSE (JSON):
117
- [{{"content": 1, "score": <0 or 1>, "reasoning": "<explanation>"}},
118
- {{"content": 2, "score": <0 or 1>, "reasoning": "<explanation>"}},
119
- ...]"""
120
  )
121
- context_relevancy_chain = LLMChain(llm=llm_judge, prompt=relevancy_prompt, output_key="relevancy_response")
122
  relevancy_response = context_relevancy_chain.invoke({"context": context_texts, "retriever_query": query})
123
 
124
- st.success("βœ… **Context Relevancy Evaluated!**")
125
  st.json(relevancy_response['relevancy_response'])
126
 
127
  # Step 7: Selecting Relevant Contexts
128
  with st.spinner("Selecting the most relevant contexts..."):
129
  relevant_prompt = PromptTemplate(
130
- input_variables=["relevancy_response"],
131
- template="""Extract contexts with score 0 from the relevancy response.
132
-
133
- RELEVANCY RESPONSE:
134
- {relevancy_response}
135
-
136
- RESPONSE (JSON):
137
- [{{"content": <content number>}}]
138
- """
139
  )
140
  pick_relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
141
  relevant_response = pick_relevant_context_chain.invoke({"relevancy_response": relevancy_response['relevancy_response']})
142
 
143
- st.success("βœ… **Relevant Contexts Selected!**")
144
  st.json(relevant_response['context_number'])
145
 
146
  # Step 8: Retrieving Context for Response Generation
147
  with st.spinner("Retrieving final context..."):
148
  context_prompt = PromptTemplate(
149
- input_variables=["context_number", "context"],
150
- template="""Extract actual content for the selected context numbers.
151
-
152
- CONTEXT NUMBERS:
153
- {context_number}
154
-
155
- CONTENT LIST:
156
- {context}
157
-
158
- RESPONSE (JSON):
159
- [{{"context_number": <content number>, "relevant_content": "<actual context>"}}]
160
- """
161
  )
162
  relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
163
  final_contexts = relevant_contexts_chain.invoke({"context_number": relevant_response['context_number'], "context": context_texts})
164
 
165
- st.success("βœ… **Final Contexts Retrieved!**")
166
  st.json(final_contexts['relevant_contexts'])
167
 
168
  # Step 9: Generate Final Response
169
  with st.spinner("Generating the final answer..."):
170
- rag_prompt = PromptTemplate(
171
- input_variables=["query", "context"],
172
- template="""Generate a clear, fact-based response based on the context.
173
-
174
- QUERY:
175
- {query}
176
-
177
- CONTEXT:
178
- {context}
179
-
180
- ANSWER:
181
- """
182
  )
183
- response_chain = LLMChain(llm=rag_llm, prompt=rag_prompt, output_key="final_response")
184
  final_response = response_chain.invoke({"query": query, "context": final_contexts['relevant_contexts']})
185
 
186
- st.success("βœ… **Final Response Generated!**")
187
  st.success(final_response['final_response'])
188
 
189
  # Step 10: Display Workflow Breakdown
190
- st.write("πŸ” **Workflow Breakdown:**")
191
  st.json({
192
  "Context Relevancy Evaluation": relevancy_response["relevancy_response"],
193
  "Relevant Contexts": relevant_response["context_number"],
 
56
  pdf_path = None
57
 
58
  # Step 2: Process PDF
59
+ if pdf_path:
60
+ with st.spinner("Loading PDF..."):
61
+ loader = PDFPlumberLoader(pdf_path)
62
+ docs = loader.load()
63
 
64
+ st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
65
+
66
+ # Step 3: Chunking
67
+ with st.spinner("Chunking the document..."):
68
+ model_name = "nomic-ai/modernbert-embed-base"
69
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
70
+ text_splitter = SemanticChunker(embedding_model)
71
+ documents = text_splitter.split_documents(docs)
72
+
73
+ st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
74
+
75
+ # Step 4: Setup Vectorstore
76
+ with st.spinner("Creating vector store..."):
77
+ vector_store = Chroma(
78
+ collection_name="deepseek_collection",
79
+ collection_metadata={"hnsw:space": "cosine"},
80
+ embedding_function=embedding_model
81
+ )
82
+ vector_store.add_documents(documents)
83
+ num_documents = len(vector_store.get()["documents"])
84
+
85
+ st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
86
+
87
+ # Step 5: Query Input
88
+ query = st.text_input("πŸ” Enter a Query:")
89
+ if query:
90
+ with st.spinner("Retrieving relevant contexts..."):
91
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
92
+ contexts = retriever.invoke(query)
93
+ context_texts = [doc.page_content for doc in contexts]
94
+
95
+ st.success(f"βœ… **Retrieved {len(context_texts)} Contexts!**")
96
+ for i, text in enumerate(context_texts, 1):
97
+ st.write(f"**Context {i}:** {text[:500]}...")
98
+
 
 
 
 
 
 
99
  # Step 6: Context Relevancy Checker
100
  with st.spinner("Evaluating context relevancy..."):
101
+ context_relevancy_checker_prompt = PromptTemplate(
102
+ input_variables=["retriever_query", "context"], template=relevancy_prompt
 
 
 
 
 
 
 
 
 
 
 
 
103
  )
104
+ context_relevancy_chain = LLMChain(llm=llm_judge, prompt=context_relevancy_checker_prompt, output_key="relevancy_response")
105
  relevancy_response = context_relevancy_chain.invoke({"context": context_texts, "retriever_query": query})
106
 
107
+ st.subheader("πŸŸ₯ Context Relevancy Evaluation")
108
  st.json(relevancy_response['relevancy_response'])
109
 
110
  # Step 7: Selecting Relevant Contexts
111
  with st.spinner("Selecting the most relevant contexts..."):
112
  relevant_prompt = PromptTemplate(
113
+ input_variables=["relevancy_response"], template=relevant_context_picker_prompt
 
 
 
 
 
 
 
 
114
  )
115
  pick_relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
116
  relevant_response = pick_relevant_context_chain.invoke({"relevancy_response": relevancy_response['relevancy_response']})
117
 
118
+ st.subheader("🟦 Pick Relevant Context Chain")
119
  st.json(relevant_response['context_number'])
120
 
121
  # Step 8: Retrieving Context for Response Generation
122
  with st.spinner("Retrieving final context..."):
123
  context_prompt = PromptTemplate(
124
+ input_variables=["context_number", "context"], template=response_synth
 
 
 
 
 
 
 
 
 
 
 
125
  )
126
  relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
127
  final_contexts = relevant_contexts_chain.invoke({"context_number": relevant_response['context_number'], "context": context_texts})
128
 
129
+ st.subheader("πŸŸ₯ Relevant Contexts Extracted")
130
  st.json(final_contexts['relevant_contexts'])
131
 
132
  # Step 9: Generate Final Response
133
  with st.spinner("Generating the final answer..."):
134
+ final_prompt = PromptTemplate(
135
+ input_variables=["query", "context"], template=rag_prompt
 
 
 
 
 
 
 
 
 
 
136
  )
137
+ response_chain = LLMChain(llm=rag_llm, prompt=final_prompt, output_key="final_response")
138
  final_response = response_chain.invoke({"query": query, "context": final_contexts['relevant_contexts']})
139
 
140
+ st.subheader("πŸŸ₯ RAG Final Response")
141
  st.success(final_response['final_response'])
142
 
143
  # Step 10: Display Workflow Breakdown
144
+ st.subheader("πŸ” **Workflow Breakdown:**")
145
  st.json({
146
  "Context Relevancy Evaluation": relevancy_response["relevancy_response"],
147
  "Relevant Contexts": relevant_response["context_number"],