Spaces:

Ahmadkhan12
/

Rag-university-act-2016

Sleeping

App Files Files Community

Rag-university-act-2016 / app.py

Ahmadkhan12

Update app.py

b4717d0 verified 6 months ago

raw

history blame

2.16 kB

	import os
	import streamlit as st
	from groq import Groq
	from langchain.chains import RetrievalQA
	from langchain.vectorstores import FAISS
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from io import BytesIO

	# Set up Groq API key
	GROQ_API_KEY = "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"

	# Define a custom embedding class for Groq
	class GroqEmbedding:
	def __init__(self, model="groq-embedding-model"):
	self.model = model
	self.client = Groq(api_key=GROQ_API_KEY)

	def embed_documents(self, texts):
	# Use Groq's API to generate embeddings for documents
	embeddings = self.client.embed_documents(texts, model=self.model)
	return embeddings

	def embed_query(self, query):
	# Use Groq's API to generate embedding for a query
	return self.client.embed_query(query, model=self.model)

	# Streamlit App UI
	st.title("PDF Question-Answering with Groq Embeddings")

	uploaded_file = st.file_uploader("Upload a PDF", type="pdf")

	# Process the uploaded PDF
	if uploaded_file is not None:
	# Convert the uploaded file to a BytesIO object to read it in-memory
	pdf_file = BytesIO(uploaded_file.read())

	# Load the PDF file with PyPDFLoader
	loader = PyPDFLoader(pdf_file)
	documents = loader.load()

	# Split documents into smaller chunks for better processing
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	split_docs = text_splitter.split_documents(documents)

	# Create embeddings using Groq
	embeddings = GroqEmbedding(model="groq-embedding-model") # Use your preferred Groq model

	# Create a FAISS vector store with the embeddings
	vector_db = FAISS.from_documents(split_docs, embeddings)

	# Initialize the retrieval-based QA system
	qa = RetrievalQA.from_chain_type(llm=None, chain_type="stuff", vectorstore=vector_db)

	# User input for querying the PDF content
	query = st.text_input("Ask a question about the PDF:")

	if query:
	result = qa.run(query)
	st.write("Answer:", result)