Spaces:

Ahmadkhan12
/

Rag-university-act-2016

Sleeping

App Files Files Community

Rag-university-act-2016 / app.py

Ahmadkhan12

Create app.py

20fe924 verified 8 months ago

raw

history blame

3.29 kB

	import streamlit as st
	from langchain.vectorstores import FAISS
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.document_loaders import PyPDFLoader
	from langchain.llms import HuggingFacePipeline
	from langchain.chains import RetrievalQA
	import groqapi

	# Step 1: Initialize Groq API and Llama Model
	def load_llama_model(api_key, model_name):
	"""Load the Llama model using Groq API."""
	groqapi.set_api_key(api_key)
	return HuggingFacePipeline.from_pretrained(model_name)

	# Step 2: Load and Process PDF
	def process_pdf(pdf_path):
	"""Load and split the PDF into documents."""
	loader = PyPDFLoader(pdf_path)
	documents = loader.load_and_split()
	return documents

	# Step 3: Create Vector Database
	def create_vector_db(documents):
	"""Create a FAISS vector database from documents."""
	embeddings = OpenAIEmbeddings() # Use OpenAI embeddings for vectorization
	vector_db = FAISS.from_documents(documents, embeddings)
	return vector_db

	# Step 4: Build RAG Pipeline
	def build_rag_pipeline(vector_db, llama_model):
	"""Build the Retrieval-Augmented Generation (RAG) pipeline."""
	retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
	qa_chain = RetrievalQA.from_chain_type(
	retriever=retriever,
	llm=llama_model,
	return_source_documents=True
	)
	return qa_chain

	# Streamlit App
	def main():
	st.title("KP Universities Act 2016 - Query App")
	st.write("Ask any question about the KP Universities Act 2016.")

	# Step 1: Upload PDF
	uploaded_pdf = st.file_uploader("Upload the KP Universities Act 2016 PDF", type="pdf")
	if uploaded_pdf:
	with open("uploaded_act.pdf", "wb") as f:
	f.write(uploaded_pdf.read())
	documents = process_pdf("uploaded_act.pdf")
	st.success("PDF Loaded and Processed Successfully!")

	# Step 2: Input Groq API Key
	api_key = st.text_input("Enter your Groq API Key", type="password")
	model_name = "llama-3.1-8b-instant"

	if api_key and st.button("Load Llama Model"):
	try:
	# Load Llama Model
	llama_model = load_llama_model(api_key, model_name)
	st.success("Llama Model Loaded Successfully!")

	# Build Vector DB and QA Chain
	vector_db = create_vector_db(documents)
	qa_chain = build_rag_pipeline(vector_db, llama_model)

	# Step 3: Ask Questions
	query = st.text_input("Ask a question:")
	if query:
	with st.spinner("Fetching Answer..."):
	response = qa_chain({"query": query})
	answer = response["result"]
	source_docs = response["source_documents"]

	# Display Answer and Sources
	st.write("### Answer:")
	st.write(answer)

	st.write("### Sources:")
	for doc in source_docs:
	st.write(f"Source: {doc.metadata.get('source', 'Unknown')}")

	except Exception as e:
	st.error(f"Error loading model or processing query: {e}")

	if __name__ == "__main__":
	main()