import streamlit as st import os from io import BytesIO from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.llms import HuggingFaceHub from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate import faiss import uuid from dotenv import load_dotenv # Load environment variables load_dotenv() HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") RAG_ACCESS_KEY = os.getenv("RAG_ACCESS_KEY") # Initialize session state if "vectorstore" not in st.session_state: st.session_state.vectorstore = None if "history" not in st.session_state: st.session_state.history = [] if "authenticated" not in st.session_state: st.session_state.authenticated = False # Sidebar with st.sidebar: # BSNL Logo (local file with error handling) try: st.image( "bsnl_logo.png", width=200 ) except FileNotFoundError: st.warning("BSNL logo not found. Please ensure 'bsnl_logo.png' exists in the project root.") st.header("RAG Control Panel") api_key_input = st.text_input("Enter RAG Access Key", type="password") # Authentication if st.button("Authenticate"): if api_key_input == RAG_ACCESS_KEY: st.session_state.authenticated = True st.success("Authentication successful!") else: st.error("Invalid API key.") # File uploader if st.session_state.authenticated: input_data = st.file_uploader("Upload a PDF file", type=["pdf"]) if st.button("Process File") and input_data is not None: try: vector_store = process_input(input_data) st.session_state.vectorstore = vector_store st.success("File processed successfully. You can now ask questions.") except (PermissionError, OSError) as e: st.error(f"File upload failed (Permission or OS error): {str(e)}. Check server permissions or file system access.") except Exception as e: st.error(f"File upload failed (Unexpected error): {str(e)}. Please try again or check server logs.") # Display chat history st.subheader("Chat History") for i, (q, a) in enumerate(st.session_state.history): st.write(f"**Q{i+1}:** {q}") st.write(f"**A{i+1}:** {a}") st.markdown("---") # Main app def main(): # Inject CSS for simple color scheme and clean styling st.markdown(""" """, unsafe_allow_html=True) st.title("RAG Q&A App with Mistral AI") st.markdown("Welcome to the BSNL RAG App! Upload your PDF files and ask questions with ease.", unsafe_allow_html=True) if not st.session_state.authenticated: st.warning("Please authenticate with your API key in the sidebar.") return if st.session_state.vectorstore is None: st.info("Please upload and process a PDF file in the sidebar.") return query = st.text_input("Enter your question:") if st.button("Submit") and query: with st.spinner("Generating answer..."): answer = answer_question(st.session_state.vectorstore, query) st.session_state.history.append((query, answer)) st.write("**Answer:**", answer) def process_input(input_data): # Create vectorstore directory for FAISS index try: os.makedirs("vectorstore", exist_ok=True) os.chmod("vectorstore", 0o777) # Ensure write permissions except PermissionError as e: st.error(f"Failed to create vectorstore directory: {str(e)}") raise # Initialize progress bar and status progress_bar = st.progress(0) status = st.status("Processing PDF file...", expanded=True) # Step 1: Read PDF file in memory status.update(label="Reading PDF file...") progress_bar.progress(0.20) pdf_reader = PdfReader(BytesIO(input_data.read())) documents = "" for page in pdf_reader.pages: documents += page.extract_text() or "" # Step 2: Split text status.update(label="Splitting text into chunks...") progress_bar.progress(0.40) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) texts = text_splitter.split_text(documents) # Step 3: Create embeddings status.update(label="Creating embeddings...") progress_bar.progress(0.60) hf_embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={'device': 'cpu'} ) # Step 4: Initialize FAISS vector store status.update(label="Building vector store...") progress_bar.progress(0.80) dimension = len(hf_embeddings.embed_query("sample text")) index = faiss.IndexFlatL2(dimension) vector_store = FAISS( embedding_function=hf_embeddings, index=index, docstore=InMemoryDocstore({}), index_to_docstore_id={} ) # Add texts to vector store uuids = [str(uuid.uuid4()) for _ in range(len(texts))] vector_store.add_texts(texts, ids=uuids) # Save vector store locally status.update(label="Saving vector store...") progress_bar.progress(0.90) vector_store.save_local("vectorstore/faiss_index") # Complete processing status.update(label="Processing complete!", state="complete") progress_bar.progress(1.0) return vector_store def answer_question(vectorstore, query): llm = HuggingFaceHub( repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.7, "max_length": 512}, huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN ) retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) prompt_template = PromptTemplate( template="Use the provided context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:", input_variables=["context", "question"] ) qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False, chain_type_kwargs={"prompt": prompt_template} ) result = qa_chain({"query": query}) return result["result"].split("Answer:")[-1].strip() if __name__ == "__main__": main()