DrishtiSharma commited on
Commit
506a700
Β·
verified Β·
1 Parent(s): 5c410d5

Delete lab/lacks_persistence.py

Browse files
Files changed (1) hide show
  1. lab/lacks_persistence.py +0 -151
lab/lacks_persistence.py DELETED
@@ -1,151 +0,0 @@
1
- import os
2
- import requests
3
- import streamlit as st
4
- from langchain.chains import SequentialChain, LLMChain
5
- from langchain.prompts import PromptTemplate
6
- from langchain_groq import ChatGroq
7
- from langchain.document_loaders import PDFPlumberLoader
8
- from langchain_experimental.text_splitter import SemanticChunker
9
- from langchain_huggingface import HuggingFaceEmbeddings
10
- from langchain_chroma import Chroma
11
- from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt, response_synth
12
-
13
-
14
- # Set API Keys
15
- os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
16
-
17
- # Load LLM models
18
- llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
19
- rag_llm = ChatGroq(model="mixtral-8x7b-32768")
20
-
21
- llm_judge.verbose = True
22
- rag_llm.verbose = True
23
-
24
- st.title("❓")
25
-
26
- # Step 1: Choose PDF Source
27
- #### Initialize pdf_path
28
- pdf_path = None
29
- pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
30
-
31
-
32
- if pdf_source == "Upload a PDF file":
33
- uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
34
- if uploaded_file:
35
- with open("temp.pdf", "wb") as f:
36
- f.write(uploaded_file.getbuffer())
37
- pdf_path = "temp.pdf"
38
-
39
- elif pdf_source == "Enter a PDF URL":
40
- pdf_url = st.text_input("Enter PDF URL:")
41
- if pdf_url:
42
- with st.spinner("Downloading PDF..."):
43
- try:
44
- response = requests.get(pdf_url)
45
- if response.status_code == 200:
46
- with open("temp.pdf", "wb") as f:
47
- f.write(response.content)
48
- pdf_path = "temp.pdf"
49
- st.success("βœ… PDF Downloaded Successfully!")
50
- else:
51
- st.error("❌ Failed to download PDF. Check the URL.")
52
- pdf_path = None
53
- except Exception as e:
54
- st.error(f"Error downloading PDF: {e}")
55
- pdf_path = None
56
- else:
57
- pdf_path = None
58
-
59
- # Step 2: Process PDF
60
- if pdf_path:
61
- with st.spinner("Loading PDF..."):
62
- loader = PDFPlumberLoader(pdf_path)
63
- docs = loader.load()
64
-
65
- st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
66
-
67
- # Step 3: Chunking
68
- with st.spinner("Chunking the document..."):
69
- model_name = "nomic-ai/modernbert-embed-base"
70
- embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
71
- text_splitter = SemanticChunker(embedding_model)
72
- documents = text_splitter.split_documents(docs)
73
-
74
- st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
75
-
76
- # Step 4: Setup Vectorstore
77
- with st.spinner("Creating vector store..."):
78
- vector_store = Chroma(
79
- collection_name="deepseek_collection",
80
- collection_metadata={"hnsw:space": "cosine"},
81
- embedding_function=embedding_model
82
- )
83
- vector_store.add_documents(documents)
84
- num_documents = len(vector_store.get()["documents"])
85
-
86
- st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
87
-
88
- # Step 5: Query Input
89
- query = st.text_input("πŸ” Enter a Query:")
90
- if query:
91
- with st.spinner("Retrieving relevant contexts..."):
92
- retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
93
- contexts = retriever.invoke(query)
94
- context_texts = [doc.page_content for doc in contexts]
95
-
96
- st.success(f"βœ… **Retrieved {len(context_texts)} Contexts!**")
97
- for i, text in enumerate(context_texts, 1):
98
- st.write(f"**Context {i}:** {text[:500]}...")
99
-
100
- # Step 6: Context Relevancy Checker
101
- with st.spinner("Evaluating context relevancy..."):
102
- context_relevancy_checker_prompt = PromptTemplate(
103
- input_variables=["retriever_query", "context"], template=relevancy_prompt
104
- )
105
- context_relevancy_chain = LLMChain(llm=llm_judge, prompt=context_relevancy_checker_prompt, output_key="relevancy_response")
106
- relevancy_response = context_relevancy_chain.invoke({"context": context_texts, "retriever_query": query})
107
-
108
- st.subheader("πŸŸ₯ Context Relevancy Evaluation")
109
- st.json(relevancy_response['relevancy_response'])
110
-
111
- # Step 7: Selecting Relevant Contexts
112
- with st.spinner("Selecting the most relevant contexts..."):
113
- relevant_prompt = PromptTemplate(
114
- input_variables=["relevancy_response"], template=relevant_context_picker_prompt
115
- )
116
- pick_relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
117
- relevant_response = pick_relevant_context_chain.invoke({"relevancy_response": relevancy_response['relevancy_response']})
118
-
119
- st.subheader("🟦 Pick Relevant Context Chain")
120
- st.json(relevant_response['context_number'])
121
-
122
- # Step 8: Retrieving Context for Response Generation
123
- with st.spinner("Retrieving final context..."):
124
- context_prompt = PromptTemplate(
125
- input_variables=["context_number", "context"], template=response_synth
126
- )
127
- relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
128
- final_contexts = relevant_contexts_chain.invoke({"context_number": relevant_response['context_number'], "context": context_texts})
129
-
130
- st.subheader("πŸŸ₯ Relevant Contexts Extracted")
131
- st.json(final_contexts['relevant_contexts'])
132
-
133
- # Step 9: Generate Final Response
134
- with st.spinner("Generating the final answer..."):
135
- final_prompt = PromptTemplate(
136
- input_variables=["query", "context"], template=rag_prompt
137
- )
138
- response_chain = LLMChain(llm=rag_llm, prompt=final_prompt, output_key="final_response")
139
- final_response = response_chain.invoke({"query": query, "context": final_contexts['relevant_contexts']})
140
-
141
- st.subheader("πŸŸ₯ RAG Final Response")
142
- st.success(final_response['final_response'])
143
-
144
- # Step 10: Display Workflow Breakdown
145
- st.subheader("πŸ” **Workflow Breakdown:**")
146
- st.json({
147
- "Context Relevancy Evaluation": relevancy_response["relevancy_response"],
148
- "Relevant Contexts": relevant_response["context_number"],
149
- "Extracted Contexts": final_contexts["relevant_contexts"],
150
- "Final Answer": final_response["final_response"]
151
- })