Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from groq import Groq | |
from langchain.chains import RetrievalQA | |
from langchain.vectorstores import FAISS | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from io import BytesIO | |
# Set up Groq API key | |
GROQ_API_KEY = "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" | |
# Define a custom embedding class for Groq | |
class GroqEmbedding: | |
def __init__(self, model="groq-embedding-model"): | |
self.model = model | |
self.client = Groq(api_key=GROQ_API_KEY) | |
def embed_documents(self, texts): | |
# Use Groq's API to generate embeddings for documents | |
embeddings = self.client.embed_documents(texts, model=self.model) | |
return embeddings | |
def embed_query(self, query): | |
# Use Groq's API to generate embedding for a query | |
return self.client.embed_query(query, model=self.model) | |
# Streamlit App UI | |
st.title("PDF Question-Answering with Groq Embeddings") | |
uploaded_file = st.file_uploader("Upload a PDF", type="pdf") | |
# Process the uploaded PDF | |
if uploaded_file is not None: | |
# Convert the uploaded file to a BytesIO object to read it in-memory | |
pdf_file = BytesIO(uploaded_file.read()) | |
# Load the PDF file with PyPDFLoader | |
loader = PyPDFLoader(pdf_file) | |
documents = loader.load() | |
# Split documents into smaller chunks for better processing | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
split_docs = text_splitter.split_documents(documents) | |
# Create embeddings using Groq | |
embeddings = GroqEmbedding(model="groq-embedding-model") # Use your preferred Groq model | |
# Create a FAISS vector store with the embeddings | |
vector_db = FAISS.from_documents(split_docs, embeddings) | |
# Initialize the retrieval-based QA system | |
qa = RetrievalQA.from_chain_type(llm=None, chain_type="stuff", vectorstore=vector_db) | |
# User input for querying the PDF content | |
query = st.text_input("Ask a question about the PDF:") | |
if query: | |
result = qa.run(query) | |
st.write("Answer:", result) | |