Ahmadkhan12 commited on
Commit
b4717d0
·
verified ·
1 Parent(s): 4cf02e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -2,27 +2,27 @@ import os
2
  import streamlit as st
3
  from groq import Groq
4
  from langchain.chains import RetrievalQA
5
- from langchain.vectorstores import FAISS # This import should work now
6
  from langchain.document_loaders import PyPDFLoader
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from langchain.llms import OpenAI
9
 
10
  # Set up Groq API key
11
  GROQ_API_KEY = "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"
12
 
13
- # Define a custom embedding class for Groq (since Langchain may not support direct Groq API integration)
14
  class GroqEmbedding:
15
  def __init__(self, model="groq-embedding-model"):
16
  self.model = model
17
  self.client = Groq(api_key=GROQ_API_KEY)
18
 
19
  def embed_documents(self, texts):
20
- # Use Groq's API to generate embeddings
21
  embeddings = self.client.embed_documents(texts, model=self.model)
22
  return embeddings
23
 
24
  def embed_query(self, query):
25
- # Use Groq's API to generate query embedding
26
  return self.client.embed_query(query, model=self.model)
27
 
28
  # Streamlit App UI
@@ -32,7 +32,11 @@ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
32
 
33
  # Process the uploaded PDF
34
  if uploaded_file is not None:
35
- loader = PyPDFLoader(uploaded_file)
 
 
 
 
36
  documents = loader.load()
37
 
38
  # Split documents into smaller chunks for better processing
@@ -40,13 +44,13 @@ if uploaded_file is not None:
40
  split_docs = text_splitter.split_documents(documents)
41
 
42
  # Create embeddings using Groq
43
- embeddings = GroqEmbedding(api_key=GROQ_API_KEY)
44
 
45
- # Create a FAISS vector store
46
  vector_db = FAISS.from_documents(split_docs, embeddings)
47
 
48
  # Initialize the retrieval-based QA system
49
- qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vector_db)
50
 
51
  # User input for querying the PDF content
52
  query = st.text_input("Ask a question about the PDF:")
 
2
  import streamlit as st
3
  from groq import Groq
4
  from langchain.chains import RetrievalQA
5
+ from langchain.vectorstores import FAISS
6
  from langchain.document_loaders import PyPDFLoader
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from io import BytesIO
9
 
10
  # Set up Groq API key
11
  GROQ_API_KEY = "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"
12
 
13
+ # Define a custom embedding class for Groq
14
  class GroqEmbedding:
15
  def __init__(self, model="groq-embedding-model"):
16
  self.model = model
17
  self.client = Groq(api_key=GROQ_API_KEY)
18
 
19
  def embed_documents(self, texts):
20
+ # Use Groq's API to generate embeddings for documents
21
  embeddings = self.client.embed_documents(texts, model=self.model)
22
  return embeddings
23
 
24
  def embed_query(self, query):
25
+ # Use Groq's API to generate embedding for a query
26
  return self.client.embed_query(query, model=self.model)
27
 
28
  # Streamlit App UI
 
32
 
33
  # Process the uploaded PDF
34
  if uploaded_file is not None:
35
+ # Convert the uploaded file to a BytesIO object to read it in-memory
36
+ pdf_file = BytesIO(uploaded_file.read())
37
+
38
+ # Load the PDF file with PyPDFLoader
39
+ loader = PyPDFLoader(pdf_file)
40
  documents = loader.load()
41
 
42
  # Split documents into smaller chunks for better processing
 
44
  split_docs = text_splitter.split_documents(documents)
45
 
46
  # Create embeddings using Groq
47
+ embeddings = GroqEmbedding(model="groq-embedding-model") # Use your preferred Groq model
48
 
49
+ # Create a FAISS vector store with the embeddings
50
  vector_db = FAISS.from_documents(split_docs, embeddings)
51
 
52
  # Initialize the retrieval-based QA system
53
+ qa = RetrievalQA.from_chain_type(llm=None, chain_type="stuff", vectorstore=vector_db)
54
 
55
  # User input for querying the PDF content
56
  query = st.text_input("Ask a question about the PDF:")