Ahmadkhan12 commited on
Commit
180125b
·
verified ·
1 Parent(s): 0d14fed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -69
app.py CHANGED
@@ -1,90 +1,78 @@
1
- import streamlit as st
2
  import os
 
 
 
3
  from langchain.vectorstores import FAISS
4
- from langchain.embeddings import OpenAIEmbeddings
5
  from langchain.document_loaders import PyPDFLoader
6
- from groq import Groq
7
-
8
- # Set API Keys (Use your provided keys)
9
- GROQ_API_KEY = "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"
10
- OPENAI_API_KEY = "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"
11
 
12
- # Initialize API Keys
13
- def initialize_keys():
14
- """Set environment variables for API keys."""
15
- os.environ["GROQ_API_KEY"] = GROQ_API_KEY
16
- os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
17
 
18
- # Initialize Groq client
19
- def initialize_groq_client():
20
- """Initialize the Groq client with the API key."""
21
- return Groq(api_key=GROQ_API_KEY)
 
 
22
 
23
- # Generate response using Groq API
24
- def generate_response(client, query, model_name="llama3-8b-8192"):
25
- """Generate a response using Groq's chat completion."""
26
- chat_completion = client.chat.completions.create(
27
- messages=[
28
- {
29
- "role": "user",
30
- "content": query,
31
- }
32
- ],
33
- model=model_name,
34
- )
35
- return chat_completion.choices[0].message.content
36
-
37
- # Load and process PDF
38
- def process_pdf(pdf_path):
39
- """Load and split the PDF into documents."""
40
- loader = PyPDFLoader(pdf_path)
41
- documents = loader.load_and_split()
42
  return documents
43
 
44
- # Create FAISS vector database
45
  def create_vector_db(documents):
46
- """Create a FAISS vector database from documents."""
47
- embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) # Use OpenAI API key
48
  vector_db = FAISS.from_documents(documents, embeddings)
49
  return vector_db
50
 
51
- # Build RAG pipeline
52
- def build_rag_pipeline(vector_db, groq_client):
53
- """Build the Retrieval-Augmented Generation (RAG) pipeline."""
54
- retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
55
- return retriever, groq_client
 
 
 
 
 
 
 
 
56
 
57
- # Streamlit App
58
  def main():
59
- st.title("KP Universities Act 2016 - Query App")
60
- st.write("Ask any question about the KP Universities Act 2016.")
 
 
 
 
 
 
 
 
61
 
62
- # Step 1: Upload PDF
63
- uploaded_pdf = st.file_uploader("Upload the KP Universities Act 2016 PDF", type="pdf")
64
- if uploaded_pdf:
65
- with open("uploaded_act.pdf", "wb") as f:
66
- f.write(uploaded_pdf.read())
67
- documents = process_pdf("uploaded_act.pdf")
68
- st.success("PDF Loaded and Processed Successfully!")
69
 
70
- # Initialize Groq client
71
- initialize_keys()
72
- groq_client = initialize_groq_client()
73
 
74
- # Step 2: Build Vector DB and QA Chain
75
- vector_db = create_vector_db(documents)
76
- retriever, groq_client = build_rag_pipeline(vector_db, groq_client)
 
77
 
78
- # Step 3: Ask Questions
79
- query = st.text_input("Ask a question:")
80
- if query:
81
- with st.spinner("Fetching Answer..."):
82
- # Use Groq API to generate answer
83
- answer = generate_response(groq_client, query)
84
-
85
- # Display Answer
86
- st.write("### Answer:")
87
- st.write(answer)
88
 
89
  if __name__ == "__main__":
90
  main()
 
 
1
  import os
2
+ import streamlit as st
3
+ from groq import Groq
4
+ from langchain.embeddings import BaseEmbedding
5
  from langchain.vectorstores import FAISS
 
6
  from langchain.document_loaders import PyPDFLoader
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain.llms import OpenAI # Keep this if you're still using OpenAI for QA model, otherwise replace it
9
+ from langchain.prompts import PromptTemplate
 
 
10
 
11
+ # Groq API setup for embeddings
12
+ class GroqEmbedding(BaseEmbedding):
13
+ def __init__(self, api_key: str):
14
+ self.client = Groq(api_key=api_key)
 
15
 
16
+ def embed_documents(self, texts: list) -> list:
17
+ embeddings = []
18
+ for text in texts:
19
+ response = self.client.embeddings.create(input=text)
20
+ embeddings.append(response['data'])
21
+ return embeddings
22
 
23
+ # Load documents from uploaded PDF file
24
+ def load_documents(uploaded_file):
25
+ loader = PyPDFLoader(uploaded_file)
26
+ documents = loader.load()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  return documents
28
 
29
+ # Create FAISS vector database with Groq embeddings
30
  def create_vector_db(documents):
31
+ # Use Groq embeddings
32
+ embeddings = GroqEmbedding(api_key="your-groq-api-key") # Pass your Groq API key
33
  vector_db = FAISS.from_documents(documents, embeddings)
34
  return vector_db
35
 
36
+ # Function to perform QA with the uploaded documents
37
+ def perform_qa(vector_db, query):
38
+ # Set up the prompt and model for QA
39
+ prompt_template = "Answer the following question based on the documents: {question}"
40
+ prompt = PromptTemplate(input_variables=["question"], template=prompt_template)
41
+ qa_chain = load_qa_chain(OpenAI(), chain_type="stuff", prompt=prompt) # Keep OpenAI model for QA
42
+
43
+ # Query the vector DB to retrieve the most relevant documents
44
+ results = vector_db.similarity_search(query)
45
+
46
+ # Perform QA using the chain
47
+ answer = qa_chain.run(input_documents=results, question=query)
48
+ return answer
49
 
50
+ # Streamlit UI setup
51
  def main():
52
+ st.title("Document Upload and Question Answering")
53
+
54
+ # Upload PDF file
55
+ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
56
+ if uploaded_file:
57
+ st.write("File uploaded successfully!")
58
+
59
+ try:
60
+ # Load documents from the uploaded PDF
61
+ documents = load_documents(uploaded_file)
62
 
63
+ # Create a vector DB using Groq embeddings
64
+ vector_db = create_vector_db(documents)
 
 
 
 
 
65
 
66
+ # User query for Q&A
67
+ query = st.text_input("Ask a question based on the uploaded document:")
 
68
 
69
+ if query:
70
+ # Get the answer for the query
71
+ answer = perform_qa(vector_db, query)
72
+ st.write("Answer:", answer)
73
 
74
+ except Exception as e:
75
+ st.error(f"Error loading client or processing query: {e}")
 
 
 
 
 
 
 
 
76
 
77
  if __name__ == "__main__":
78
  main()