DrishtiSharma commited on
Commit
6ad3483
Β·
verified Β·
1 Parent(s): b90007a

Delete rough.py

Browse files
Files changed (1) hide show
  1. rough.py +0 -156
rough.py DELETED
@@ -1,156 +0,0 @@
1
- import streamlit as st
2
- import os
3
- import requests
4
- import chromadb
5
- from langchain.document_loaders import PDFPlumberLoader
6
- from langchain_huggingface import HuggingFaceEmbeddings
7
- from langchain_experimental.text_splitter import SemanticChunker
8
- from langchain_chroma import Chroma
9
- from langchain.chains import LLMChain, SequentialChain
10
- from langchain.prompts import PromptTemplate
11
- from langchain_groq import ChatGroq
12
- from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt, response_synth
13
-
14
- # ----------------- Streamlit UI Setup -----------------
15
- st.set_page_config(page_title="Blah", layout="wide")
16
- st.title("Blah-1")
17
-
18
- # ----------------- API Keys -----------------
19
- os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
20
-
21
- # ----------------- Ensure Vector Store Directory Exists -----------------
22
- if not os.path.exists("./chroma_langchain_db"):
23
- os.makedirs("./chroma_langchain_db")
24
-
25
- # ----------------- Clear ChromaDB Cache -----------------
26
- chromadb.api.client.SharedSystemClient.clear_system_cache()
27
-
28
- # ----------------- Initialize Session State -----------------
29
- if "pdf_loaded" not in st.session_state:
30
- st.session_state.pdf_loaded = False
31
- if "chunked" not in st.session_state:
32
- st.session_state.chunked = False
33
- if "vector_created" not in st.session_state:
34
- st.session_state.vector_created = False
35
- if "processed_chunks" not in st.session_state:
36
- st.session_state.processed_chunks = None
37
- if "vector_store" not in st.session_state:
38
- st.session_state.vector_store = None
39
-
40
- # ----------------- Load Models -------------------
41
- llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
42
- rag_llm = ChatGroq(model="mixtral-8x7b-32768")
43
-
44
- # Enable verbose logging for debugging
45
- llm_judge.verbose = True
46
- rag_llm.verbose = True
47
-
48
- # ----------------- PDF Selection (Upload or URL) -----------------
49
- st.sidebar.subheader("πŸ“‚ PDF Selection")
50
- pdf_source = st.radio("Choose a PDF source:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
51
-
52
- if pdf_source == "Upload a PDF file":
53
- uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type=["pdf"])
54
- if uploaded_file:
55
- st.session_state.pdf_path = "temp.pdf"
56
- with open(st.session_state.pdf_path, "wb") as f:
57
- f.write(uploaded_file.getbuffer())
58
- st.session_state.pdf_loaded = False
59
- st.session_state.chunked = False
60
- st.session_state.vector_created = False
61
-
62
- elif pdf_source == "Enter a PDF URL":
63
- pdf_url = st.sidebar.text_input("Enter PDF URL:")
64
- if pdf_url and not st.session_state.pdf_loaded:
65
- with st.spinner("πŸ”„ Downloading PDF..."):
66
- try:
67
- response = requests.get(pdf_url)
68
- if response.status_code == 200:
69
- st.session_state.pdf_path = "temp.pdf"
70
- with open(st.session_state.pdf_path, "wb") as f:
71
- f.write(response.content)
72
- st.session_state.pdf_loaded = False
73
- st.session_state.chunked = False
74
- st.session_state.vector_created = False
75
- st.success("βœ… PDF Downloaded Successfully!")
76
- else:
77
- st.error("❌ Failed to download PDF. Check the URL.")
78
- except Exception as e:
79
- st.error(f"Error downloading PDF: {e}")
80
-
81
- # ----------------- Process PDF -----------------
82
- if not st.session_state.pdf_loaded and "pdf_path" in st.session_state:
83
- with st.spinner("πŸ”„ Processing document... Please wait."):
84
- loader = PDFPlumberLoader(st.session_state.pdf_path)
85
- docs = loader.load()
86
-
87
- # Embedding Model (HF on CPU)
88
- model_name = "nomic-ai/modernbert-embed-base"
89
- embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"})
90
-
91
- # Split into Chunks
92
- text_splitter = SemanticChunker(embedding_model)
93
- document_chunks = text_splitter.split_documents(docs)
94
-
95
- # Store chunks in session state
96
- st.session_state.processed_chunks = document_chunks
97
- st.session_state.pdf_loaded = True
98
- st.success("βœ… Document processed and chunked successfully!")
99
-
100
- # ----------------- Setup Vector Store -----------------
101
- if not st.session_state.vector_created and st.session_state.processed_chunks:
102
- with st.spinner("πŸ”„ Initializing Vector Store..."):
103
- vector_store = Chroma(
104
- collection_name="deepseek_collection",
105
- collection_metadata={"hnsw:space": "cosine"},
106
- embedding_function=embedding_model,
107
- persist_directory="./chroma_langchain_db"
108
- )
109
- vector_store.add_documents(st.session_state.processed_chunks)
110
- st.session_state.vector_store = vector_store
111
- st.session_state.vector_created = True
112
- st.success("βœ… Vector store initialized successfully!")
113
-
114
- # ----------------- Query Input -----------------
115
- query = st.text_input("πŸ” Ask a question about the document:")
116
-
117
- if query:
118
- with st.spinner("πŸ”„ Retrieving relevant context..."):
119
- retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
120
- retrieved_docs = retriever.invoke(query)
121
- context = [d.page_content for d in retrieved_docs]
122
- st.success("βœ… Context retrieved successfully!")
123
-
124
- # ----------------- Full SequentialChain Execution -----------------
125
- with st.spinner("πŸ”„ Running full pipeline..."):
126
- context_relevancy_checker_prompt = PromptTemplate(input_variables=["retriever_query", "context"], template=relevancy_prompt)
127
- relevant_prompt = PromptTemplate(input_variables=["relevancy_response"], template=relevant_context_picker_prompt)
128
- context_prompt = PromptTemplate(input_variables=["context_number", "context"], template=response_synth)
129
- final_prompt = PromptTemplate(input_variables=["query", "context"], template=rag_prompt)
130
-
131
- context_relevancy_chain = LLMChain(llm=llm_judge, prompt=context_relevancy_checker_prompt, output_key="relevancy_response")
132
- relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
133
- relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
134
- response_chain = LLMChain(llm=rag_llm, prompt=final_prompt, output_key="final_response")
135
-
136
- context_management_chain = SequentialChain(
137
- chains=[context_relevancy_chain, relevant_context_chain, relevant_contexts_chain, response_chain],
138
- input_variables=["context", "retriever_query", "query"],
139
- output_variables=["relevancy_response", "context_number", "relevant_contexts", "final_response"]
140
- )
141
-
142
- final_output = context_management_chain.invoke({"context": context, "retriever_query": query, "query": query})
143
- st.success("βœ… Full pipeline executed successfully!")
144
-
145
- # ----------------- Display All Outputs (Formatted) -----------------
146
- st.markdown("### πŸŸ₯ Context Relevancy Evaluation")
147
- st.json(final_output["relevancy_response"])
148
-
149
- st.markdown("### 🟦 Picked Relevant Contexts")
150
- st.json(final_output["context_number"])
151
-
152
- st.markdown("### πŸŸ₯ Extracted Relevant Contexts")
153
- st.json(final_output["relevant_contexts"])
154
-
155
- st.markdown("## πŸŸ₯ RAG Final Response")
156
- st.write(final_output["final_response"])