Spaces:

Anne31415
/

Public_BookBot

Sleeping

App Files Files Community

Public_BookBot / app.py

Anne31415

Update app.py

dcd9708 almost 2 years ago

raw

history blame

7.06 kB

	import streamlit as st
	from dotenv import load_dotenv
	import pickle
	from huggingface_hub import Repository
	from PyPDF2 import PdfReader
	from streamlit_extras.add_vertical_space import add_vertical_space
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.llms import OpenAI
	from langchain.chains.question_answering import load_qa_chain
	from langchain.callbacks import get_openai_callback
	import os

	# Step 1: Clone the Dataset Repository
	repo = Repository(
	local_dir="Private_Book", # Local directory to clone the repository
	repo_type="dataset", # Specify that this is a dataset repository

	clone_from="Anne31415/Private_Book", # Replace with your repository URL

	token=os.environ["HUB_TOKEN"] # Use the secret token to authenticate
	)
	repo.git_pull() # Pull the latest changes (if any)

	# Step 2: Load the PDF File
	pdf_file_path = "Private_Book/Glossar_HELP_DESK_combi.pdf" # Replace with your PDF file path



	# Sidebar contents
	with st.sidebar:
	st.title(':orange[BinDoc GmbH]')
	st.markdown(
	"Experience the future of document interaction with the revolutionary"
	)

	st.markdown("BinDocs Chat App.")


	st.markdown("Harnessing the power of a Large Language Model and AI technology,")



	st.markdown("this innovative platform redefines PDF engagement,")

	st.markdown("enabling dynamic conversations that bridge the gap between")
	st.markdown("human and machine intelligence.")



	add_vertical_space(3) # Add more vertical space between text blocks
	st.write('Made with ❤️ by BinDoc GmbH')

	api_key = os.getenv("OPENAI_API_KEY")
	# Retrieve the API key from st.secrets

	def load_pdf(file_path):
	pdf_reader = PdfReader(file_path)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200,
	length_function=len
	)
	chunks = text_splitter.split_text(text=text)

	store_name, _ = os.path.splitext(os.path.basename(file_path))

	if os.path.exists(f"{store_name}.pkl"):
	with open(f"{store_name}.pkl", "rb") as f:
	VectorStore = pickle.load(f)
	else:
	embeddings = OpenAIEmbeddings()
	VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
	with open(f"{store_name}.pkl", "wb") as f:
	pickle.dump(VectorStore, f)

	return VectorStore



	def load_chatbot():
	return load_qa_chain(llm=OpenAI(), chain_type="stuff")

	def main():
	st.title("BinDocs Chat App")
	st.markdown(
	"""🤖 Welcome to BinDocs ChatBot! 🤖

	Hello! I’m your friendly assistant, designed to help you navigate through our platform with ease. Here's a snapshot of what I can assist you with:

	📘 Glossary Inquiries:
	Having trouble understanding specific terms? Ask me! For instance, if you are unsure about what "Belegarzt" means, just type in “What is a Belegarzt?” and I will provide you with a detailed explanation based on our glossary.

	🆘 Help Page Navigation:
	I can guide you through our help page and answer your queries regarding any problems or inquiries you might have, such as “Forgot your Password?” or other platform-related concerns.

	#### How to Interact:
	Simply type in your question or concern, and I will do my best to assist you. Examples are shown at the bottom of this page. Try some out!"""
	)

	# Directly specifying the path to the PDF file
	pdf_path = pdf_file_path
	if not os.path.exists(pdf_path):
	st.error("File not found. Please check the file path.")
	return

	if "chat_history" not in st.session_state:
	st.session_state['chat_history'] = []

	display_chat_history(st.session_state['chat_history'])

	st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
	st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
	st.write("<!-- End Spacer -->", unsafe_allow_html=True)

	new_messages_placeholder = st.empty()

	if pdf_path is not None:
	query = st.text_input("Ask questions about your PDF file (in any preferred language):")

	if st.button("Was genau ist ein Belegarzt?"):
	query = "Was genau ist ein Belegarzt?"
	if st.button("Wofür wird die Alpha-ID verwendet?"):
	query = "Wofür wird die Alpha-ID verwendet?"
	if st.button("Was sind die Vorteile des ambulanten operierens?"):
	query = "Was sind die Vorteile des ambulanten operierens?"
	if st.button("Was kann ich mit dem Prognose-Analyse Toll machen?"):
	query = "Was kann ich mit dem Prognose-Analyse Toll machen?"
	if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
	query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"


	if st.button("Ask") or (not st.session_state['chat_history'] and query) or (st.session_state['chat_history'] and query != st.session_state['chat_history'][-1][1]):
	st.session_state['chat_history'].append(("User", query, "new"))

	loading_message = st.empty()
	loading_message.text('Bot is thinking...')

	VectorStore = load_pdf(pdf_path)
	chain = load_chatbot()
	docs = VectorStore.similarity_search(query=query, k=3)
	with get_openai_callback() as cb:
	response = chain.run(input_documents=docs, question=query)

	st.session_state['chat_history'].append(("Bot", response, "new"))

	# Display new messages at the bottom
	new_messages = st.session_state['chat_history'][-2:]
	for chat in new_messages:
	background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
	new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)

	# Scroll to the latest response using JavaScript
	st.write("<script>document.getElementById('response').scrollIntoView();</script>", unsafe_allow_html=True)

	loading_message.empty()

	# Clear the input field by setting the query variable to an empty string
	query = ""

	# Mark all messages as old after displaying
	st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]



	def display_chat_history(chat_history):
	for chat in chat_history:
	background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
	st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)

	if __name__ == "__main__":
	main()