|
import os |
|
import streamlit as st |
|
from groq import Groq |
|
from langchain.chains import RetrievalQA |
|
from langchain.vectorstores import FAISS |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from io import BytesIO |
|
|
|
|
|
GROQ_API_KEY = "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" |
|
|
|
|
|
class GroqEmbedding: |
|
def __init__(self, model="groq-embedding-model"): |
|
self.model = model |
|
self.client = Groq(api_key=GROQ_API_KEY) |
|
|
|
def embed_documents(self, texts): |
|
|
|
embeddings = self.client.embed_documents(texts, model=self.model) |
|
return embeddings |
|
|
|
def embed_query(self, query): |
|
|
|
return self.client.embed_query(query, model=self.model) |
|
|
|
|
|
st.title("PDF Question-Answering with Groq Embeddings") |
|
|
|
uploaded_file = st.file_uploader("Upload a PDF", type="pdf") |
|
|
|
|
|
if uploaded_file is not None: |
|
|
|
pdf_file = BytesIO(uploaded_file.read()) |
|
|
|
|
|
loader = PyPDFLoader(pdf_file) |
|
documents = loader.load() |
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
split_docs = text_splitter.split_documents(documents) |
|
|
|
|
|
embeddings = GroqEmbedding(model="groq-embedding-model") |
|
|
|
|
|
vector_db = FAISS.from_documents(split_docs, embeddings) |
|
|
|
|
|
qa = RetrievalQA.from_chain_type(llm=None, chain_type="stuff", vectorstore=vector_db) |
|
|
|
|
|
query = st.text_input("Ask a question about the PDF:") |
|
|
|
if query: |
|
result = qa.run(query) |
|
st.write("Answer:", result) |
|
|