Ahmadkhan12 commited on
Commit
030a55c
·
verified ·
1 Parent(s): 78a4d31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -67
app.py CHANGED
@@ -1,78 +1,60 @@
1
  import os
2
  import streamlit as st
 
3
  from groq import Groq
4
- from langchain.embeddings import BaseEmbedding
5
  from langchain.vectorstores import FAISS
6
  from langchain.document_loaders import PyPDFLoader
7
- from langchain.chains.question_answering import load_qa_chain
8
- from langchain.llms import OpenAI # Keep this if you're still using OpenAI for QA model, otherwise replace it
9
- from langchain.prompts import PromptTemplate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # Groq API setup for embeddings
12
- class GroqEmbedding(BaseEmbedding):
13
- def __init__(self, api_key: str):
14
- self.client = Groq(api_key=api_key)
15
 
16
- def embed_documents(self, texts: list) -> list:
17
- embeddings = []
18
- for text in texts:
19
- response = self.client.embeddings.create(input=text)
20
- embeddings.append(response['data'])
21
- return embeddings
22
 
23
- # Load documents from uploaded PDF file
24
- def load_documents(uploaded_file):
25
  loader = PyPDFLoader(uploaded_file)
26
  documents = loader.load()
27
- return documents
28
-
29
- # Create FAISS vector database with Groq embeddings
30
- def create_vector_db(documents):
31
- # Use Groq embeddings
32
- embeddings = GroqEmbedding(api_key="your-groq-api-key") # Pass your Groq API key
33
- vector_db = FAISS.from_documents(documents, embeddings)
34
- return vector_db
35
-
36
- # Function to perform QA with the uploaded documents
37
- def perform_qa(vector_db, query):
38
- # Set up the prompt and model for QA
39
- prompt_template = "Answer the following question based on the documents: {question}"
40
- prompt = PromptTemplate(input_variables=["question"], template=prompt_template)
41
- qa_chain = load_qa_chain(OpenAI(), chain_type="stuff", prompt=prompt) # Keep OpenAI model for QA
42
-
43
- # Query the vector DB to retrieve the most relevant documents
44
- results = vector_db.similarity_search(query)
45
-
46
- # Perform QA using the chain
47
- answer = qa_chain.run(input_documents=results, question=query)
48
- return answer
49
-
50
- # Streamlit UI setup
51
- def main():
52
- st.title("Document Upload and Question Answering")
53
-
54
- # Upload PDF file
55
- uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
56
- if uploaded_file:
57
- st.write("File uploaded successfully!")
58
-
59
- try:
60
- # Load documents from the uploaded PDF
61
- documents = load_documents(uploaded_file)
62
-
63
- # Create a vector DB using Groq embeddings
64
- vector_db = create_vector_db(documents)
65
-
66
- # User query for Q&A
67
- query = st.text_input("Ask a question based on the uploaded document:")
68
-
69
- if query:
70
- # Get the answer for the query
71
- answer = perform_qa(vector_db, query)
72
- st.write("Answer:", answer)
73
-
74
- except Exception as e:
75
- st.error(f"Error loading client or processing query: {e}")
76
 
77
- if __name__ == "__main__":
78
- main()
 
1
  import os
2
  import streamlit as st
3
+ from langchain.embeddings import Embedding
4
  from groq import Groq
5
+ from langchain.chains import RetrievalQA
6
  from langchain.vectorstores import FAISS
7
  from langchain.document_loaders import PyPDFLoader
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.llms import OpenAI
10
+ from langchain.agents import initialize_agent
11
+ from langchain.agents import Tool
12
+
13
+ # Set up Groq API
14
+ groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
15
+
16
+ # Define a custom embedding class for Groq
17
+ class GroqEmbedding(Embedding):
18
+ def __init__(self, model="groq-embedding-model", api_key=None):
19
+ self.model = model
20
+ self.client = Groq(api_key=api_key or os.getenv("GROQ_API_KEY"))
21
+
22
+ def embed_documents(self, texts):
23
+ # Use Groq's API to generate embeddings
24
+ embeddings = self.client.embed_documents(texts, model=self.model)
25
+ return embeddings
26
+
27
+ def embed_query(self, query):
28
+ # Use Groq's API to generate query embedding
29
+ return self.client.embed_query(query, model=self.model)
30
 
31
+ # Streamlit App UI
32
+ st.title("PDF Question-Answering with Groq Embeddings")
 
 
33
 
34
+ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
 
 
 
 
 
35
 
36
+ # Process the uploaded PDF
37
+ if uploaded_file is not None:
38
  loader = PyPDFLoader(uploaded_file)
39
  documents = loader.load()
40
+
41
+ # Split documents into smaller chunks for better processing
42
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
43
+ split_docs = text_splitter.split_documents(documents)
44
+
45
+ # Create embeddings using Groq
46
+ embeddings = GroqEmbedding(api_key=os.getenv("GROQ_API_KEY"))
47
+
48
+ # Create a FAISS vector store
49
+ vector_db = FAISS.from_documents(split_docs, embeddings)
50
+
51
+ # Initialize the retrieval-based QA system
52
+ qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vector_db)
53
+
54
+ # User input for querying the PDF content
55
+ query = st.text_input("Ask a question about the PDF:")
56
+
57
+ if query:
58
+ result = qa.run(query)
59
+ st.write("Answer:", result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60