Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,27 +2,27 @@ import os
|
|
2 |
import streamlit as st
|
3 |
from groq import Groq
|
4 |
from langchain.chains import RetrievalQA
|
5 |
-
from langchain.vectorstores import FAISS
|
6 |
from langchain.document_loaders import PyPDFLoader
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
-
from
|
9 |
|
10 |
# Set up Groq API key
|
11 |
GROQ_API_KEY = "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"
|
12 |
|
13 |
-
# Define a custom embedding class for Groq
|
14 |
class GroqEmbedding:
|
15 |
def __init__(self, model="groq-embedding-model"):
|
16 |
self.model = model
|
17 |
self.client = Groq(api_key=GROQ_API_KEY)
|
18 |
|
19 |
def embed_documents(self, texts):
|
20 |
-
# Use Groq's API to generate embeddings
|
21 |
embeddings = self.client.embed_documents(texts, model=self.model)
|
22 |
return embeddings
|
23 |
|
24 |
def embed_query(self, query):
|
25 |
-
# Use Groq's API to generate query
|
26 |
return self.client.embed_query(query, model=self.model)
|
27 |
|
28 |
# Streamlit App UI
|
@@ -32,7 +32,11 @@ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
|
|
32 |
|
33 |
# Process the uploaded PDF
|
34 |
if uploaded_file is not None:
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
documents = loader.load()
|
37 |
|
38 |
# Split documents into smaller chunks for better processing
|
@@ -40,13 +44,13 @@ if uploaded_file is not None:
|
|
40 |
split_docs = text_splitter.split_documents(documents)
|
41 |
|
42 |
# Create embeddings using Groq
|
43 |
-
embeddings = GroqEmbedding(
|
44 |
|
45 |
-
# Create a FAISS vector store
|
46 |
vector_db = FAISS.from_documents(split_docs, embeddings)
|
47 |
|
48 |
# Initialize the retrieval-based QA system
|
49 |
-
qa = RetrievalQA.from_chain_type(llm=
|
50 |
|
51 |
# User input for querying the PDF content
|
52 |
query = st.text_input("Ask a question about the PDF:")
|
|
|
2 |
import streamlit as st
|
3 |
from groq import Groq
|
4 |
from langchain.chains import RetrievalQA
|
5 |
+
from langchain.vectorstores import FAISS
|
6 |
from langchain.document_loaders import PyPDFLoader
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
+
from io import BytesIO
|
9 |
|
10 |
# Set up Groq API key
|
11 |
GROQ_API_KEY = "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"
|
12 |
|
13 |
+
# Define a custom embedding class for Groq
|
14 |
class GroqEmbedding:
|
15 |
def __init__(self, model="groq-embedding-model"):
|
16 |
self.model = model
|
17 |
self.client = Groq(api_key=GROQ_API_KEY)
|
18 |
|
19 |
def embed_documents(self, texts):
|
20 |
+
# Use Groq's API to generate embeddings for documents
|
21 |
embeddings = self.client.embed_documents(texts, model=self.model)
|
22 |
return embeddings
|
23 |
|
24 |
def embed_query(self, query):
|
25 |
+
# Use Groq's API to generate embedding for a query
|
26 |
return self.client.embed_query(query, model=self.model)
|
27 |
|
28 |
# Streamlit App UI
|
|
|
32 |
|
33 |
# Process the uploaded PDF
|
34 |
if uploaded_file is not None:
|
35 |
+
# Convert the uploaded file to a BytesIO object to read it in-memory
|
36 |
+
pdf_file = BytesIO(uploaded_file.read())
|
37 |
+
|
38 |
+
# Load the PDF file with PyPDFLoader
|
39 |
+
loader = PyPDFLoader(pdf_file)
|
40 |
documents = loader.load()
|
41 |
|
42 |
# Split documents into smaller chunks for better processing
|
|
|
44 |
split_docs = text_splitter.split_documents(documents)
|
45 |
|
46 |
# Create embeddings using Groq
|
47 |
+
embeddings = GroqEmbedding(model="groq-embedding-model") # Use your preferred Groq model
|
48 |
|
49 |
+
# Create a FAISS vector store with the embeddings
|
50 |
vector_db = FAISS.from_documents(split_docs, embeddings)
|
51 |
|
52 |
# Initialize the retrieval-based QA system
|
53 |
+
qa = RetrievalQA.from_chain_type(llm=None, chain_type="stuff", vectorstore=vector_db)
|
54 |
|
55 |
# User input for querying the PDF content
|
56 |
query = st.text_input("Ask a question about the PDF:")
|