Spaces:
Sleeping
Sleeping
File size: 2,156 Bytes
41a527e 180125b 030a55c b4717d0 20fe924 030a55c b4717d0 030a55c bb3c09f 030a55c b4717d0 bb3c09f 030a55c bb3c09f 030a55c b4717d0 030a55c b4717d0 030a55c 250d534 030a55c 20fe924 030a55c 41a527e 030a55c b4717d0 180125b 030a55c b4717d0 030a55c b4717d0 030a55c b4717d0 030a55c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import os
import streamlit as st
from groq import Groq
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from io import BytesIO
# Set up Groq API key
GROQ_API_KEY = "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"
# Define a custom embedding class for Groq
class GroqEmbedding:
def __init__(self, model="groq-embedding-model"):
self.model = model
self.client = Groq(api_key=GROQ_API_KEY)
def embed_documents(self, texts):
# Use Groq's API to generate embeddings for documents
embeddings = self.client.embed_documents(texts, model=self.model)
return embeddings
def embed_query(self, query):
# Use Groq's API to generate embedding for a query
return self.client.embed_query(query, model=self.model)
# Streamlit App UI
st.title("PDF Question-Answering with Groq Embeddings")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
# Process the uploaded PDF
if uploaded_file is not None:
# Convert the uploaded file to a BytesIO object to read it in-memory
pdf_file = BytesIO(uploaded_file.read())
# Load the PDF file with PyPDFLoader
loader = PyPDFLoader(pdf_file)
documents = loader.load()
# Split documents into smaller chunks for better processing
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(documents)
# Create embeddings using Groq
embeddings = GroqEmbedding(model="groq-embedding-model") # Use your preferred Groq model
# Create a FAISS vector store with the embeddings
vector_db = FAISS.from_documents(split_docs, embeddings)
# Initialize the retrieval-based QA system
qa = RetrievalQA.from_chain_type(llm=None, chain_type="stuff", vectorstore=vector_db)
# User input for querying the PDF content
query = st.text_input("Ask a question about the PDF:")
if query:
result = qa.run(query)
st.write("Answer:", result)
|