Spaces:

Ahmadkhan12
/

Rag-university-act-2016

Sleeping

App Files Files Community

Ahmadkhan12 commited on Nov 24, 2024

Commit

180125b

verified ·

1 Parent(s): 0d14fed

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -69

app.py CHANGED Viewed

@@ -1,90 +1,78 @@
-import streamlit as st
 import os
 from langchain.vectorstores import FAISS
-from langchain.embeddings import OpenAIEmbeddings
 from langchain.document_loaders import PyPDFLoader
-from groq import Groq
-# Set API Keys (Use your provided keys)
-GROQ_API_KEY = "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"
-OPENAI_API_KEY = "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"
-# Initialize API Keys
-def initialize_keys():
-    """Set environment variables for API keys."""
-    os.environ["GROQ_API_KEY"] = GROQ_API_KEY
-    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
-# Initialize Groq client
-def initialize_groq_client():
-    """Initialize the Groq client with the API key."""
-    return Groq(api_key=GROQ_API_KEY)
-# Generate response using Groq API
-def generate_response(client, query, model_name="llama3-8b-8192"):
-    """Generate a response using Groq's chat completion."""
-    chat_completion = client.chat.completions.create(
-        messages=[
-            {
-                "role": "user",
-                "content": query,
-            }
-        ],
-        model=model_name,
-    )
-    return chat_completion.choices[0].message.content
-# Load and process PDF
-def process_pdf(pdf_path):
-    """Load and split the PDF into documents."""
-    loader = PyPDFLoader(pdf_path)
-    documents = loader.load_and_split()
     return documents
-# Create FAISS vector database
 def create_vector_db(documents):
-    """Create a FAISS vector database from documents."""
-    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)  # Use OpenAI API key
     vector_db = FAISS.from_documents(documents, embeddings)
     return vector_db
-# Build RAG pipeline
-def build_rag_pipeline(vector_db, groq_client):
-    """Build the Retrieval-Augmented Generation (RAG) pipeline."""
-    retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
-    return retriever, groq_client
-# Streamlit App
 def main():
-    st.title("KP Universities Act 2016 - Query App")
-    st.write("Ask any question about the KP Universities Act 2016.")
-    # Step 1: Upload PDF
-    uploaded_pdf = st.file_uploader("Upload the KP Universities Act 2016 PDF", type="pdf")
-    if uploaded_pdf:
-        with open("uploaded_act.pdf", "wb") as f:
-            f.write(uploaded_pdf.read())
-        documents = process_pdf("uploaded_act.pdf")
-        st.success("PDF Loaded and Processed Successfully!")
-        # Initialize Groq client
-        initialize_keys()
-        groq_client = initialize_groq_client()
-        # Step 2: Build Vector DB and QA Chain
-        vector_db = create_vector_db(documents)
-        retriever, groq_client = build_rag_pipeline(vector_db, groq_client)
-        # Step 3: Ask Questions
-        query = st.text_input("Ask a question:")
-        if query:
-            with st.spinner("Fetching Answer..."):
-                # Use Groq API to generate answer
-                answer = generate_response(groq_client, query)
-                # Display Answer
-                st.write("### Answer:")
-                st.write(answer)
 if __name__ == "__main__":
     main()

 import os
+import streamlit as st
+from groq import Groq
+from langchain.embeddings import BaseEmbedding
 from langchain.vectorstores import FAISS
 from langchain.document_loaders import PyPDFLoader
+from langchain.chains.question_answering import load_qa_chain
+from langchain.llms import OpenAI  # Keep this if you're still using OpenAI for QA model, otherwise replace it
+from langchain.prompts import PromptTemplate
+# Groq API setup for embeddings
+class GroqEmbedding(BaseEmbedding):
+    def __init__(self, api_key: str):
+        self.client = Groq(api_key=api_key)
+    def embed_documents(self, texts: list) -> list:
+        embeddings = []
+        for text in texts:
+            response = self.client.embeddings.create(input=text)
+            embeddings.append(response['data'])
+        return embeddings
+# Load documents from uploaded PDF file
+def load_documents(uploaded_file):
+    loader = PyPDFLoader(uploaded_file)
+    documents = loader.load()
     return documents
+# Create FAISS vector database with Groq embeddings
 def create_vector_db(documents):
+    # Use Groq embeddings
+    embeddings = GroqEmbedding(api_key="your-groq-api-key")  # Pass your Groq API key
     vector_db = FAISS.from_documents(documents, embeddings)
     return vector_db
+# Function to perform QA with the uploaded documents
+def perform_qa(vector_db, query):
+    # Set up the prompt and model for QA
+    prompt_template = "Answer the following question based on the documents: {question}"
+    prompt = PromptTemplate(input_variables=["question"], template=prompt_template)
+    qa_chain = load_qa_chain(OpenAI(), chain_type="stuff", prompt=prompt)  # Keep OpenAI model for QA
+    # Query the vector DB to retrieve the most relevant documents
+    results = vector_db.similarity_search(query)
+    # Perform QA using the chain
+    answer = qa_chain.run(input_documents=results, question=query)
+    return answer
+# Streamlit UI setup
 def main():
+    st.title("Document Upload and Question Answering")
+    # Upload PDF file
+    uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
+    if uploaded_file:
+        st.write("File uploaded successfully!")
+        try:
+            # Load documents from the uploaded PDF
+            documents = load_documents(uploaded_file)
+            # Create a vector DB using Groq embeddings
+            vector_db = create_vector_db(documents)
+            # User query for Q&A
+            query = st.text_input("Ask a question based on the uploaded document:")
+            if query:
+                # Get the answer for the query
+                answer = perform_qa(vector_db, query)
+                st.write("Answer:", answer)
+        except Exception as e:
+            st.error(f"Error loading client or processing query: {e}")
 if __name__ == "__main__":
     main()