DrishtiSharma commited on
Commit
313bc69
Β·
verified Β·
1 Parent(s): 6ad3483

Delete test.py

Browse files
Files changed (1) hide show
  1. test.py +0 -141
test.py DELETED
@@ -1,141 +0,0 @@
1
- import streamlit as st
2
- import os
3
- import requests
4
- import chromadb
5
- from langchain.document_loaders import PDFPlumberLoader
6
- from langchain_huggingface import HuggingFaceEmbeddings
7
- from langchain_experimental.text_splitter import SemanticChunker
8
- from langchain_chroma import Chroma
9
- from langchain.chains import LLMChain, SequentialChain
10
- from langchain.prompts import PromptTemplate
11
- from langchain_groq import ChatGroq
12
- from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt, response_synth
13
-
14
- # ----------------- Streamlit UI Setup -----------------
15
- st.set_page_config(page_title="Blah", layout="wide")
16
- st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=150)
17
- st.title("Blah-1")
18
-
19
- # ----------------- API Keys -----------------
20
- os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
21
-
22
- # ----------------- Clear ChromaDB Cache -----------------
23
- chromadb.api.client.SharedSystemClient.clear_system_cache()
24
-
25
- # ----------------- Initialize Session State -----------------
26
- if "pdf_loaded" not in st.session_state:
27
- st.session_state.pdf_loaded = False
28
- if "chunked" not in st.session_state:
29
- st.session_state.chunked = False
30
- if "vector_created" not in st.session_state:
31
- st.session_state.vector_created = False
32
- if "processed_chunks" not in st.session_state:
33
- st.session_state.processed_chunks = None
34
- if "vector_store" not in st.session_state:
35
- st.session_state.vector_store = None
36
-
37
- # ----------------- Load Models -----------------
38
- llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
39
- rag_llm = ChatGroq(model="mixtral-8x7b-32768")
40
-
41
- # Enable verbose logging for debugging
42
- llm_judge.verbose = True
43
- rag_llm.verbose = True
44
-
45
- # ----------------- PDF Selection (Upload or URL) -----------------
46
- st.sidebar.subheader("πŸ“‚ PDF Selection")
47
- pdf_source = st.radio("Choose a PDF source:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
48
-
49
- if pdf_source == "Upload a PDF file":
50
- uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type=["pdf"])
51
- if uploaded_file:
52
- st.session_state.pdf_path = "temp.pdf"
53
- with open(st.session_state.pdf_path, "wb") as f:
54
- f.write(uploaded_file.getbuffer())
55
- st.session_state.pdf_loaded = False
56
- st.session_state.chunked = False
57
- st.session_state.vector_created = False
58
-
59
- elif pdf_source == "Enter a PDF URL":
60
- pdf_url = st.sidebar.text_input("Enter PDF URL:")
61
- if pdf_url and not st.session_state.pdf_loaded:
62
- with st.spinner("πŸ”„ Downloading PDF..."):
63
- try:
64
- response = requests.get(pdf_url)
65
- if response.status_code == 200:
66
- st.session_state.pdf_path = "temp.pdf"
67
- with open(st.session_state.pdf_path, "wb") as f:
68
- f.write(response.content)
69
- st.session_state.pdf_loaded = False
70
- st.session_state.chunked = False
71
- st.session_state.vector_created = False
72
- st.success("βœ… PDF Downloaded Successfully!")
73
- else:
74
- st.error("❌ Failed to download PDF. Check the URL.")
75
- except Exception as e:
76
- st.error(f"Error downloading PDF: {e}")
77
-
78
- # ----------------- Process PDF -----------------
79
- if not st.session_state.pdf_loaded and "pdf_path" in st.session_state:
80
- with st.spinner("πŸ”„ Processing document... Please wait."):
81
- loader = PDFPlumberLoader(st.session_state.pdf_path)
82
- docs = loader.load()
83
-
84
- # Embedding Model (HF on CPU)
85
- model_name = "nomic-ai/modernbert-embed-base"
86
- embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"})
87
-
88
- # Prevent unnecessary re-chunking
89
- if not st.session_state.chunked:
90
- text_splitter = SemanticChunker(embedding_model)
91
- document_chunks = text_splitter.split_documents(docs)
92
- st.session_state.processed_chunks = document_chunks
93
- st.session_state.chunked = True
94
-
95
- st.session_state.pdf_loaded = True
96
- st.success("βœ… Document processed and chunked successfully!")
97
-
98
- # ----------------- Setup Vector Store -----------------
99
- if not st.session_state.vector_created and st.session_state.processed_chunks:
100
- with st.spinner("πŸ”„ Initializing Vector Store..."):
101
- st.session_state.vector_store = Chroma(
102
- collection_name="deepseek_collection",
103
- collection_metadata={"hnsw:space": "cosine"},
104
- embedding_function=embedding_model
105
- )
106
- st.session_state.vector_store.add_documents(st.session_state.processed_chunks)
107
- st.session_state.vector_created = True
108
- st.success("βœ… Vector store initialized successfully!")
109
-
110
- # ----------------- Query Input -----------------
111
- query = st.text_input("πŸ” Ask a question about the document:")
112
-
113
- if query:
114
- with st.spinner("πŸ”„ Retrieving relevant context..."):
115
- retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
116
- retrieved_docs = retriever.invoke(query)
117
- context = [d.page_content for d in retrieved_docs]
118
- st.success("βœ… Context retrieved successfully!")
119
-
120
- # ----------------- Full SequentialChain Execution -----------------
121
- with st.spinner("πŸ”„ Running full pipeline..."):
122
- final_output = SequentialChain(
123
- chains=[
124
- LLMChain(llm=llm_judge, prompt=PromptTemplate(input_variables=["retriever_query", "context"], template=relevancy_prompt), output_key="relevancy_response"),
125
- LLMChain(llm=llm_judge, prompt=PromptTemplate(input_variables=["relevancy_response"], template=relevant_context_picker_prompt), output_key="context_number"),
126
- LLMChain(llm=llm_judge, prompt=PromptTemplate(input_variables=["context_number", "context"], template=response_synth), output_key="relevant_contexts"),
127
- LLMChain(llm=rag_llm, prompt=PromptTemplate(input_variables=["query", "context"], template=rag_prompt), output_key="final_response")
128
- ],
129
- input_variables=["context", "retriever_query", "query"],
130
- output_variables=["relevancy_response", "context_number", "relevant_contexts", "final_response"]
131
- ).invoke({"context": context, "retriever_query": query, "query": query})
132
-
133
- # ----------------- Display All Outputs -----------------
134
- st.subheader("πŸŸ₯ Context Relevancy Evaluation")
135
- st.json(final_output["relevancy_response"])
136
- st.subheader("🟦 Picked Relevant Contexts")
137
- st.json(final_output["context_number"])
138
- st.subheader("πŸŸ₯ Extracted Relevant Contexts")
139
- st.json(final_output["relevant_contexts"])
140
- st.subheader("πŸŸ₯ RAG Final Response")
141
- st.write(final_output["final_response"])