Spaces:

abhinavyadav11
/

RAG_Enhanced_Chatbot

Sleeping

App Files Files Community

RAG_Enhanced_Chatbot / app.py

abhinavyadav11

Upload 2 files

4fe3a6c verified 9 months ago

raw

history blame contribute delete

4.04 kB

	import streamlit as st
	import os
	from dotenv import load_dotenv
	from transformers import pipeline
	from io import BytesIO
	from pypdf import PdfReader
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from main import get_index_for_pdf # Assuming 'main.py' contains this function

	# Initialize session state for the app
	if "vectordb" not in st.session_state:
	st.session_state["vectordb"] = None
	if "prompt" not in st.session_state:
	st.session_state["prompt"] = [{"role": "system", "content": "none"}]

	# Set the title for the Streamlit app
	st.title("RAG Enhance Chatbot")

	# Hugging Face API Key (avoid hardcoding for production)
	load_dotenv()
	HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")


	# st.title('Model Configuration')
	# model_name = st.sidebar.selectbox(
	# "Choose a Hugging Face Model",
	# [
	# "sentence-transformers/all-mpnet-base-v2",
	# "sentence-transformers/all-MiniLM-L6-v2",
	# "msmarco-distilbert-base-tas-b",
	# "deepset/roberta-large-squad2",
	# "facebook/dpr-ctx_encoder-single-nq-base"
	# ],
	# index=0 # Default model
	# )

	# Define the QA pipeline
	qa_pipeline = pipeline(
	"question-answering",
	model="deepset/roberta-base-squad2", # Replace with your desired model
	use_auth_token=HUGGINGFACE_API_KEY
	)

	# Define a prompt template for the assistant
	prompt_template = """
	You are a helpful Assistant who answers users' questions based on PDF extracts.

	Keep your answer lengthy and if long make points.

	Context information includes 'filename' and 'page'. Always reference these in your responses.

	If the text is irrelevant or insufficient to answer, respond with "Not applicable."

	The provided PDF content is:
	{pdf_extract}
	"""

	# Cached function to create a vector database for the provided PDF files
	@st.cache_data
	def create_vectordb(files, filenames, huggingface_model_name):
	# Show a spinner while creating the vector database
	with st.spinner("Creating Vector Database..."):
	vectordb = get_index_for_pdf(
	[file.getvalue() for file in files], filenames, huggingface_model_name
	)
	return vectordb

	# Upload PDF files using Streamlit file uploader
	pdf_files = st.file_uploader("Upload your PDFs", type="pdf", accept_multiple_files=True)

	# If PDF files are uploaded, create the vector database and store it in the session state
	if pdf_files:
	pdf_file_names = [file.name for file in pdf_files]
	huggingface_model_name = "sentence-transformers/all-MiniLM-L6-v2" # Correct model name
	st.session_state["vectordb"] = create_vectordb(pdf_files, pdf_file_names, huggingface_model_name)

	# Display previous chat messages
	for message in st.session_state["prompt"]:
	if message["role"] != "system":
	with st.chat_message(message["role"]):
	st.write(message["content"])

	# Get the user's question using Streamlit chat input
	question = st.chat_input("Ask anything")

	# Handle the user's question
	if question:
	vectordb = st.session_state.get("vectordb", None)
	if not vectordb:
	with st.chat_message("assistant"):
	st.write("You need to upload a PDF first.")
	st.stop()

	# Search the vector database for similar content to the user's question
	search_results = vectordb.similarity_search(question, k=3)
	pdf_extract = "\n".join(
	[
	f"{result.page_content} (Filename: {result.metadata['filename']}, Page: {result.metadata['page']})"
	for result in search_results
	]
	)

	# Use the QA pipeline with the context
	response = qa_pipeline(question=question, context=pdf_extract)

	# Update the assistant's response
	with st.chat_message("assistant"):
	st.write(response["answer"])

	# Update the session state prompt
	st.session_state["prompt"].append({"role": "user", "content": question})
	st.session_state["prompt"].append({"role": "assistant", "content": response["answer"]})