DrishtiSharma commited on
Commit
3c4e62e
Β·
verified Β·
1 Parent(s): d38433c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -0
app.py CHANGED
@@ -48,3 +48,137 @@ elif pdf_source == "Enter a PDF URL":
48
  else:
49
  pdf_path = None
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  else:
49
  pdf_path = None
50
 
51
+ # Step 2: Process PDF
52
+ if pdf_path:
53
+ with st.spinner("Loading PDF..."):
54
+ loader = PDFPlumberLoader(pdf_path)
55
+ docs = loader.load()
56
+
57
+ st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
58
+
59
+ # Step 3: Chunking
60
+ with st.spinner("Chunking the document..."):
61
+ model_name = "nomic-ai/modernbert-embed-base"
62
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
63
+
64
+ text_splitter = SemanticChunker(embedding_model)
65
+ documents = text_splitter.split_documents(docs)
66
+
67
+ st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
68
+
69
+ # Step 4: Setup Vectorstore
70
+ with st.spinner("Creating vector store..."):
71
+ vector_store = Chroma(
72
+ collection_name="deepseek_collection",
73
+ collection_metadata={"hnsw:space": "cosine"},
74
+ embedding_function=embedding_model
75
+ )
76
+ vector_store.add_documents(documents)
77
+
78
+ st.success("βœ… **Vector Store Created!**")
79
+
80
+ # Step 5: Query Input
81
+ query = st.text_input("πŸ” Enter a Query:")
82
+ if query:
83
+ with st.spinner("Retrieving relevant contexts..."):
84
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
85
+ contexts = retriever.invoke(query)
86
+ context_texts = [doc.page_content for doc in contexts]
87
+
88
+ st.success(f"βœ… **Retrieved {len(context_texts)} Contexts!**")
89
+ for i, text in enumerate(context_texts, 1):
90
+ st.write(f"**Context {i}:** {text[:500]}...")
91
+
92
+ # Step 6: Context Relevancy Checker
93
+ with st.spinner("Evaluating context relevancy..."):
94
+ relevancy_prompt = PromptTemplate(
95
+ input_variables=["retriever_query", "context"],
96
+ template="""You are an expert judge. Assign relevancy scores (0 or 1) for each context to answer the query.
97
+
98
+ CONTEXT LIST:
99
+ {context}
100
+
101
+ QUERY:
102
+ {retriever_query}
103
+
104
+ RESPONSE (JSON):
105
+ [{{"content": 1, "score": <0 or 1>, "reasoning": "<explanation>"}},
106
+ {{"content": 2, "score": <0 or 1>, "reasoning": "<explanation>"}},
107
+ ...]"""
108
+ )
109
+ context_relevancy_chain = LLMChain(llm=llm_judge, prompt=relevancy_prompt, output_key="relevancy_response")
110
+ relevancy_response = context_relevancy_chain.invoke({"context": context_texts, "retriever_query": query})
111
+
112
+ st.success("βœ… **Context Relevancy Evaluated!**")
113
+ st.json(relevancy_response['relevancy_response'])
114
+
115
+ # Step 7: Selecting Relevant Contexts
116
+ with st.spinner("Selecting the most relevant contexts..."):
117
+ relevant_prompt = PromptTemplate(
118
+ input_variables=["relevancy_response"],
119
+ template="""Extract contexts with score 0 from the relevancy response.
120
+
121
+ RELEVANCY RESPONSE:
122
+ {relevancy_response}
123
+
124
+ RESPONSE (JSON):
125
+ [{{"content": <content number>}}]
126
+ """
127
+ )
128
+ pick_relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
129
+ relevant_response = pick_relevant_context_chain.invoke({"relevancy_response": relevancy_response['relevancy_response']})
130
+
131
+ st.success("βœ… **Relevant Contexts Selected!**")
132
+ st.json(relevant_response['context_number'])
133
+
134
+ # Step 8: Retrieving Context for Response Generation
135
+ with st.spinner("Retrieving final context..."):
136
+ context_prompt = PromptTemplate(
137
+ input_variables=["context_number", "context"],
138
+ template="""Extract actual content for the selected context numbers.
139
+
140
+ CONTEXT NUMBERS:
141
+ {context_number}
142
+
143
+ CONTENT LIST:
144
+ {context}
145
+
146
+ RESPONSE (JSON):
147
+ [{{"context_number": <content number>, "relevant_content": "<actual context>"}}]
148
+ """
149
+ )
150
+ relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
151
+ final_contexts = relevant_contexts_chain.invoke({"context_number": relevant_response['context_number'], "context": context_texts})
152
+
153
+ st.success("βœ… **Final Contexts Retrieved!**")
154
+ st.json(final_contexts['relevant_contexts'])
155
+
156
+ # Step 9: Generate Final Response
157
+ with st.spinner("Generating the final answer..."):
158
+ rag_prompt = PromptTemplate(
159
+ input_variables=["query", "context"],
160
+ template="""Generate a clear, fact-based response based on the context.
161
+
162
+ QUERY:
163
+ {query}
164
+
165
+ CONTEXT:
166
+ {context}
167
+
168
+ ANSWER:
169
+ """
170
+ )
171
+ response_chain = LLMChain(llm=rag_llm, prompt=rag_prompt, output_key="final_response")
172
+ final_response = response_chain.invoke({"query": query, "context": final_contexts['relevant_contexts']})
173
+
174
+ st.success("βœ… **Final Response Generated!**")
175
+ st.success(final_response['final_response'])
176
+
177
+ # Step 10: Display Workflow Breakdown
178
+ st.write("πŸ” **Workflow Breakdown:**")
179
+ st.json({
180
+ "Context Relevancy Evaluation": relevancy_response["relevancy_response"],
181
+ "Relevant Contexts": relevant_response["context_number"],
182
+ "Extracted Contexts": final_contexts["relevant_contexts"],
183
+ "Final Answer": final_response["final_response"]
184
+ })