Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.llms import HuggingFacePipeline | |
from langchain.chains import RetrievalQA | |
import groqapi | |
# Step 1: Initialize Groq API and Llama Model | |
def load_llama_model(api_key, model_name): | |
"""Load the Llama model using Groq API.""" | |
groqapi.set_api_key(api_key) | |
return HuggingFacePipeline.from_pretrained(model_name) | |
# Step 2: Load and Process PDF | |
def process_pdf(pdf_path): | |
"""Load and split the PDF into documents.""" | |
loader = PyPDFLoader(pdf_path) | |
documents = loader.load_and_split() | |
return documents | |
# Step 3: Create Vector Database | |
def create_vector_db(documents): | |
"""Create a FAISS vector database from documents.""" | |
embeddings = OpenAIEmbeddings() # Use OpenAI embeddings for vectorization | |
vector_db = FAISS.from_documents(documents, embeddings) | |
return vector_db | |
# Step 4: Build RAG Pipeline | |
def build_rag_pipeline(vector_db, llama_model): | |
"""Build the Retrieval-Augmented Generation (RAG) pipeline.""" | |
retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5}) | |
qa_chain = RetrievalQA.from_chain_type( | |
retriever=retriever, | |
llm=llama_model, | |
return_source_documents=True | |
) | |
return qa_chain | |
# Streamlit App | |
def main(): | |
st.title("KP Universities Act 2016 - Query App") | |
st.write("Ask any question about the KP Universities Act 2016.") | |
# Step 1: Upload PDF | |
uploaded_pdf = st.file_uploader("Upload the KP Universities Act 2016 PDF", type="pdf") | |
if uploaded_pdf: | |
with open("uploaded_act.pdf", "wb") as f: | |
f.write(uploaded_pdf.read()) | |
documents = process_pdf("uploaded_act.pdf") | |
st.success("PDF Loaded and Processed Successfully!") | |
# Step 2: Input Groq API Key | |
api_key = st.text_input("Enter your Groq API Key", type="password") | |
model_name = "llama-3.1-8b-instant" | |
if api_key and st.button("Load Llama Model"): | |
try: | |
# Load Llama Model | |
llama_model = load_llama_model(api_key, model_name) | |
st.success("Llama Model Loaded Successfully!") | |
# Build Vector DB and QA Chain | |
vector_db = create_vector_db(documents) | |
qa_chain = build_rag_pipeline(vector_db, llama_model) | |
# Step 3: Ask Questions | |
query = st.text_input("Ask a question:") | |
if query: | |
with st.spinner("Fetching Answer..."): | |
response = qa_chain({"query": query}) | |
answer = response["result"] | |
source_docs = response["source_documents"] | |
# Display Answer and Sources | |
st.write("### Answer:") | |
st.write(answer) | |
st.write("### Sources:") | |
for doc in source_docs: | |
st.write(f"Source: {doc.metadata.get('source', 'Unknown')}") | |
except Exception as e: | |
st.error(f"Error loading model or processing query: {e}") | |
if __name__ == "__main__": | |
main() | |