Spaces:

Ahmadkhan12
/

Rag-university-act-2016

Sleeping

Rag-university-act-2016

File size: 3,286 Bytes

20fe924

import streamlit as st
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
import groqapi

# Step 1: Initialize Groq API and Llama Model
def load_llama_model(api_key, model_name):
    """Load the Llama model using Groq API."""
    groqapi.set_api_key(api_key)
    return HuggingFacePipeline.from_pretrained(model_name)

# Step 2: Load and Process PDF
def process_pdf(pdf_path):
    """Load and split the PDF into documents."""
    loader = PyPDFLoader(pdf_path)
    documents = loader.load_and_split()
    return documents

# Step 3: Create Vector Database
def create_vector_db(documents):
    """Create a FAISS vector database from documents."""
    embeddings = OpenAIEmbeddings()  # Use OpenAI embeddings for vectorization
    vector_db = FAISS.from_documents(documents, embeddings)
    return vector_db

# Step 4: Build RAG Pipeline
def build_rag_pipeline(vector_db, llama_model):
    """Build the Retrieval-Augmented Generation (RAG) pipeline."""
    retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
    qa_chain = RetrievalQA.from_chain_type(
        retriever=retriever,
        llm=llama_model,
        return_source_documents=True
    )
    return qa_chain

# Streamlit App
def main():
    st.title("KP Universities Act 2016 - Query App")
    st.write("Ask any question about the KP Universities Act 2016.")

    # Step 1: Upload PDF
    uploaded_pdf = st.file_uploader("Upload the KP Universities Act 2016 PDF", type="pdf")
    if uploaded_pdf:
        with open("uploaded_act.pdf", "wb") as f:
            f.write(uploaded_pdf.read())
        documents = process_pdf("uploaded_act.pdf")
        st.success("PDF Loaded and Processed Successfully!")

        # Step 2: Input Groq API Key
        api_key = st.text_input("Enter your Groq API Key", type="password")
        model_name = "llama-3.1-8b-instant"

        if api_key and st.button("Load Llama Model"):
            try:
                # Load Llama Model
                llama_model = load_llama_model(api_key, model_name)
                st.success("Llama Model Loaded Successfully!")

                # Build Vector DB and QA Chain
                vector_db = create_vector_db(documents)
                qa_chain = build_rag_pipeline(vector_db, llama_model)

                # Step 3: Ask Questions
                query = st.text_input("Ask a question:")
                if query:
                    with st.spinner("Fetching Answer..."):
                        response = qa_chain({"query": query})
                        answer = response["result"]
                        source_docs = response["source_documents"]

                        # Display Answer and Sources
                        st.write("### Answer:")
                        st.write(answer)

                        st.write("### Sources:")
                        for doc in source_docs:
                            st.write(f"Source: {doc.metadata.get('source', 'Unknown')}")

            except Exception as e:
                st.error(f"Error loading model or processing query: {e}")

if __name__ == "__main__":
    main()