Ahmadkhan12 commited on
Commit
20fe924
·
verified ·
1 Parent(s): 96b2871

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.embeddings import OpenAIEmbeddings
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.llms import HuggingFacePipeline
6
+ from langchain.chains import RetrievalQA
7
+ import groqapi
8
+
9
+ # Step 1: Initialize Groq API and Llama Model
10
+ def load_llama_model(api_key, model_name):
11
+ """Load the Llama model using Groq API."""
12
+ groqapi.set_api_key(api_key)
13
+ return HuggingFacePipeline.from_pretrained(model_name)
14
+
15
+ # Step 2: Load and Process PDF
16
+ def process_pdf(pdf_path):
17
+ """Load and split the PDF into documents."""
18
+ loader = PyPDFLoader(pdf_path)
19
+ documents = loader.load_and_split()
20
+ return documents
21
+
22
+ # Step 3: Create Vector Database
23
+ def create_vector_db(documents):
24
+ """Create a FAISS vector database from documents."""
25
+ embeddings = OpenAIEmbeddings() # Use OpenAI embeddings for vectorization
26
+ vector_db = FAISS.from_documents(documents, embeddings)
27
+ return vector_db
28
+
29
+ # Step 4: Build RAG Pipeline
30
+ def build_rag_pipeline(vector_db, llama_model):
31
+ """Build the Retrieval-Augmented Generation (RAG) pipeline."""
32
+ retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
33
+ qa_chain = RetrievalQA.from_chain_type(
34
+ retriever=retriever,
35
+ llm=llama_model,
36
+ return_source_documents=True
37
+ )
38
+ return qa_chain
39
+
40
+ # Streamlit App
41
+ def main():
42
+ st.title("KP Universities Act 2016 - Query App")
43
+ st.write("Ask any question about the KP Universities Act 2016.")
44
+
45
+ # Step 1: Upload PDF
46
+ uploaded_pdf = st.file_uploader("Upload the KP Universities Act 2016 PDF", type="pdf")
47
+ if uploaded_pdf:
48
+ with open("uploaded_act.pdf", "wb") as f:
49
+ f.write(uploaded_pdf.read())
50
+ documents = process_pdf("uploaded_act.pdf")
51
+ st.success("PDF Loaded and Processed Successfully!")
52
+
53
+ # Step 2: Input Groq API Key
54
+ api_key = st.text_input("Enter your Groq API Key", type="password")
55
+ model_name = "llama-3.1-8b-instant"
56
+
57
+ if api_key and st.button("Load Llama Model"):
58
+ try:
59
+ # Load Llama Model
60
+ llama_model = load_llama_model(api_key, model_name)
61
+ st.success("Llama Model Loaded Successfully!")
62
+
63
+ # Build Vector DB and QA Chain
64
+ vector_db = create_vector_db(documents)
65
+ qa_chain = build_rag_pipeline(vector_db, llama_model)
66
+
67
+ # Step 3: Ask Questions
68
+ query = st.text_input("Ask a question:")
69
+ if query:
70
+ with st.spinner("Fetching Answer..."):
71
+ response = qa_chain({"query": query})
72
+ answer = response["result"]
73
+ source_docs = response["source_documents"]
74
+
75
+ # Display Answer and Sources
76
+ st.write("### Answer:")
77
+ st.write(answer)
78
+
79
+ st.write("### Sources:")
80
+ for doc in source_docs:
81
+ st.write(f"Source: {doc.metadata.get('source', 'Unknown')}")
82
+
83
+ except Exception as e:
84
+ st.error(f"Error loading model or processing query: {e}")
85
+
86
+ if __name__ == "__main__":
87
+ main()