DrishtiSharma commited on
Commit
5c410d5
Β·
verified Β·
1 Parent(s): de11db1

Delete interim.py

Browse files
Files changed (1) hide show
  1. interim.py +0 -151
interim.py DELETED
@@ -1,151 +0,0 @@
1
- import streamlit as st
2
- import os
3
- import requests
4
- import tempfile
5
- import chromadb
6
- from langchain.document_loaders import PDFPlumberLoader
7
- from langchain_huggingface import HuggingFaceEmbeddings
8
- from langchain_experimental.text_splitter import SemanticChunker
9
- from langchain_chroma import Chroma
10
- from langchain.chains import LLMChain, SequentialChain
11
- from langchain.prompts import PromptTemplate
12
- from langchain_groq import ChatGroq
13
- from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt, response_synth
14
-
15
- # ----------------- Streamlit UI Setup -----------------
16
- st.set_page_config(page_title="Blah", layout="wide")
17
- st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=150)
18
- st.title("Blah-1")
19
-
20
-
21
- # ----------------- API Keys -----------------
22
- os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
23
-
24
- # ----------------- Clear ChromaDB Cache -----------------
25
- chromadb.api.client.SharedSystemClient.clear_system_cache()
26
-
27
- # ----------------- Initialize Session State -----------------
28
- if "pdf_loaded" not in st.session_state:
29
- st.session_state.pdf_loaded = False
30
- if "chunked" not in st.session_state:
31
- st.session_state.chunked = False
32
- if "vector_created" not in st.session_state:
33
- st.session_state.vector_created = False
34
- if "processed_chunks" not in st.session_state:
35
- st.session_state.processed_chunks = None
36
- if "vector_store" not in st.session_state:
37
- st.session_state.vector_store = None
38
-
39
- # ----------------- Load Models -----------------
40
- llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
41
- rag_llm = ChatGroq(model="mixtral-8x7b-32768")
42
-
43
- # ----------------- PDF Selection (Upload or URL) -----------------
44
- st.sidebar.subheader("πŸ“‚ PDF Selection")
45
- pdf_source = st.radio("Choose a PDF source:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
46
-
47
- if pdf_source == "Upload a PDF file":
48
- uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type=["pdf"])
49
- if uploaded_file:
50
- st.session_state.pdf_path = "temp.pdf"
51
- with open(st.session_state.pdf_path, "wb") as f:
52
- f.write(uploaded_file.getbuffer())
53
- st.session_state.pdf_loaded = False
54
- st.session_state.chunked = False
55
- st.session_state.vector_created = False
56
-
57
- elif pdf_source == "Enter a PDF URL":
58
- pdf_url = st.sidebar.text_input("Enter PDF URL:")
59
- if pdf_url and not st.session_state.pdf_loaded:
60
- with st.spinner("πŸ”„ Downloading PDF..."):
61
- try:
62
- response = requests.get(pdf_url)
63
- if response.status_code == 200:
64
- st.session_state.pdf_path = "temp.pdf"
65
- with open(st.session_state.pdf_path, "wb") as f:
66
- f.write(response.content)
67
- st.session_state.pdf_loaded = False
68
- st.session_state.chunked = False
69
- st.session_state.vector_created = False
70
- st.success("βœ… PDF Downloaded Successfully!")
71
- else:
72
- st.error("❌ Failed to download PDF. Check the URL.")
73
- except Exception as e:
74
- st.error(f"Error downloading PDF: {e}")
75
-
76
- # ----------------- Process PDF -----------------
77
- if not st.session_state.pdf_loaded and "pdf_path" in st.session_state:
78
- with st.spinner("πŸ”„ Processing document... Please wait."):
79
- loader = PDFPlumberLoader(st.session_state.pdf_path)
80
- docs = loader.load()
81
-
82
- # Embedding Model
83
- model_name = "nomic-ai/modernbert-embed-base"
84
- embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"})
85
-
86
- # Split into Chunks
87
- text_splitter = SemanticChunker(embedding_model)
88
- document_chunks = text_splitter.split_documents(docs)
89
-
90
- # Store chunks in session state
91
- st.session_state.processed_chunks = document_chunks
92
- st.session_state.pdf_loaded = True
93
- st.success("βœ… Document processed and chunked successfully!")
94
-
95
- # ----------------- Setup Vector Store -----------------
96
- if not st.session_state.vector_created and st.session_state.processed_chunks:
97
- with st.spinner("πŸ”„ Initializing Vector Store..."):
98
- vector_store = Chroma(
99
- collection_name="deepseek_collection",
100
- collection_metadata={"hnsw:space": "cosine"},
101
- embedding_function=embedding_model,
102
- persist_directory="./chroma_langchain_db"
103
- )
104
- vector_store.add_documents(st.session_state.processed_chunks)
105
- st.session_state.vector_store = vector_store
106
- st.session_state.vector_created = True
107
- st.success("βœ… Vector store initialized successfully!")
108
-
109
- # ----------------- Query Input -----------------
110
- query = st.text_input("πŸ” Ask a question about the document:")
111
-
112
- if query:
113
- with st.spinner("πŸ”„ Retrieving relevant context..."):
114
- retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
115
- retrieved_docs = retriever.invoke(query)
116
- context = [d.page_content for d in retrieved_docs]
117
- st.success("βœ… Context retrieved successfully!")
118
-
119
- # ----------------- Full SequentialChain Execution -----------------
120
- with st.spinner("πŸ”„ Running full pipeline..."):
121
- context_relevancy_checker_prompt = PromptTemplate(input_variables=["retriever_query", "context"], template=relevancy_prompt)
122
- relevant_prompt = PromptTemplate(input_variables=["relevancy_response"], template=relevant_context_picker_prompt)
123
- context_prompt = PromptTemplate(input_variables=["context_number", "context"], template=response_synth)
124
- final_prompt = PromptTemplate(input_variables=["query", "context"], template=rag_prompt)
125
-
126
- context_relevancy_chain = LLMChain(llm=llm_judge, prompt=context_relevancy_checker_prompt, output_key="relevancy_response")
127
- relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
128
- relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
129
- response_chain = LLMChain(llm=rag_llm, prompt=final_prompt, output_key="final_response")
130
-
131
- context_management_chain = SequentialChain(
132
- chains=[context_relevancy_chain, relevant_context_chain, relevant_contexts_chain, response_chain],
133
- input_variables=["context", "retriever_query", "query"],
134
- output_variables=["relevancy_response", "context_number", "relevant_contexts", "final_response"]
135
- )
136
-
137
- final_output = context_management_chain.invoke({"context": context, "retriever_query": query, "query": query})
138
- st.success("βœ… Full pipeline executed successfully!")
139
-
140
- # ----------------- Display All Outputs -----------------
141
- st.subheader("πŸŸ₯ Context Relevancy Evaluation")
142
- st.json(final_output["relevancy_response"])
143
-
144
- st.subheader("🟦 Picked Relevant Contexts")
145
- st.json(final_output["context_number"])
146
-
147
- st.subheader("πŸŸ₯ Extracted Relevant Contexts")
148
- st.json(final_output["relevant_contexts"])
149
-
150
- st.subheader("πŸŸ₯ RAG Final Response")
151
- st.write(final_output["final_response"])