Ahmadkhan12's picture
Create app.py
20fe924 verified
raw
history blame
3.29 kB
import streamlit as st
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
import groqapi
# Step 1: Initialize Groq API and Llama Model
def load_llama_model(api_key, model_name):
"""Load the Llama model using Groq API."""
groqapi.set_api_key(api_key)
return HuggingFacePipeline.from_pretrained(model_name)
# Step 2: Load and Process PDF
def process_pdf(pdf_path):
"""Load and split the PDF into documents."""
loader = PyPDFLoader(pdf_path)
documents = loader.load_and_split()
return documents
# Step 3: Create Vector Database
def create_vector_db(documents):
"""Create a FAISS vector database from documents."""
embeddings = OpenAIEmbeddings() # Use OpenAI embeddings for vectorization
vector_db = FAISS.from_documents(documents, embeddings)
return vector_db
# Step 4: Build RAG Pipeline
def build_rag_pipeline(vector_db, llama_model):
"""Build the Retrieval-Augmented Generation (RAG) pipeline."""
retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
qa_chain = RetrievalQA.from_chain_type(
retriever=retriever,
llm=llama_model,
return_source_documents=True
)
return qa_chain
# Streamlit App
def main():
st.title("KP Universities Act 2016 - Query App")
st.write("Ask any question about the KP Universities Act 2016.")
# Step 1: Upload PDF
uploaded_pdf = st.file_uploader("Upload the KP Universities Act 2016 PDF", type="pdf")
if uploaded_pdf:
with open("uploaded_act.pdf", "wb") as f:
f.write(uploaded_pdf.read())
documents = process_pdf("uploaded_act.pdf")
st.success("PDF Loaded and Processed Successfully!")
# Step 2: Input Groq API Key
api_key = st.text_input("Enter your Groq API Key", type="password")
model_name = "llama-3.1-8b-instant"
if api_key and st.button("Load Llama Model"):
try:
# Load Llama Model
llama_model = load_llama_model(api_key, model_name)
st.success("Llama Model Loaded Successfully!")
# Build Vector DB and QA Chain
vector_db = create_vector_db(documents)
qa_chain = build_rag_pipeline(vector_db, llama_model)
# Step 3: Ask Questions
query = st.text_input("Ask a question:")
if query:
with st.spinner("Fetching Answer..."):
response = qa_chain({"query": query})
answer = response["result"]
source_docs = response["source_documents"]
# Display Answer and Sources
st.write("### Answer:")
st.write(answer)
st.write("### Sources:")
for doc in source_docs:
st.write(f"Source: {doc.metadata.get('source', 'Unknown')}")
except Exception as e:
st.error(f"Error loading model or processing query: {e}")
if __name__ == "__main__":
main()