import streamlit as st from langchain.vectorstores import FAISS from langchain.embeddings import OpenAIEmbeddings from langchain.document_loaders import PyPDFLoader from langchain.llms import HuggingFacePipeline from langchain.chains import RetrievalQA import groqapi # Step 1: Initialize Groq API and Llama Model def load_llama_model(api_key, model_name): """Load the Llama model using Groq API.""" groqapi.set_api_key(api_key) return HuggingFacePipeline.from_pretrained(model_name) # Step 2: Load and Process PDF def process_pdf(pdf_path): """Load and split the PDF into documents.""" loader = PyPDFLoader(pdf_path) documents = loader.load_and_split() return documents # Step 3: Create Vector Database def create_vector_db(documents): """Create a FAISS vector database from documents.""" embeddings = OpenAIEmbeddings() # Use OpenAI embeddings for vectorization vector_db = FAISS.from_documents(documents, embeddings) return vector_db # Step 4: Build RAG Pipeline def build_rag_pipeline(vector_db, llama_model): """Build the Retrieval-Augmented Generation (RAG) pipeline.""" retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5}) qa_chain = RetrievalQA.from_chain_type( retriever=retriever, llm=llama_model, return_source_documents=True ) return qa_chain # Streamlit App def main(): st.title("KP Universities Act 2016 - Query App") st.write("Ask any question about the KP Universities Act 2016.") # Step 1: Upload PDF uploaded_pdf = st.file_uploader("Upload the KP Universities Act 2016 PDF", type="pdf") if uploaded_pdf: with open("uploaded_act.pdf", "wb") as f: f.write(uploaded_pdf.read()) documents = process_pdf("uploaded_act.pdf") st.success("PDF Loaded and Processed Successfully!") # Step 2: Input Groq API Key api_key = st.text_input("Enter your Groq API Key", type="password") model_name = "llama-3.1-8b-instant" if api_key and st.button("Load Llama Model"): try: # Load Llama Model llama_model = load_llama_model(api_key, model_name) st.success("Llama Model Loaded Successfully!") # Build Vector DB and QA Chain vector_db = create_vector_db(documents) qa_chain = build_rag_pipeline(vector_db, llama_model) # Step 3: Ask Questions query = st.text_input("Ask a question:") if query: with st.spinner("Fetching Answer..."): response = qa_chain({"query": query}) answer = response["result"] source_docs = response["source_documents"] # Display Answer and Sources st.write("### Answer:") st.write(answer) st.write("### Sources:") for doc in source_docs: st.write(f"Source: {doc.metadata.get('source', 'Unknown')}") except Exception as e: st.error(f"Error loading model or processing query: {e}") if __name__ == "__main__": main()