Spaces:
Sleeping
Sleeping
File size: 4,373 Bytes
7c95914 403a475 7c95914 cab37f8 7c95914 403a475 7c95914 403a475 7c95914 cab37f8 7c95914 403a475 7c95914 403a475 7c95914 cab37f8 7c95914 cab37f8 7c95914 cab37f8 7c95914 cab37f8 403a475 7c95914 403a475 7c95914 403a475 7c95914 cab37f8 7c95914 403a475 7c95914 403a475 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import streamlit as st
import pickle
from PyPDF2 import PdfReader
from streamlit_extras.add_vertical_space import add_vertical_space
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
import os
# Sidebar contents
with st.sidebar:
st.title(':orange_book: BinDoc GmbH')
st.markdown(
"Experience the future of document interaction with the revolutionary"
)
st.markdown("**BinDocs Chat App**.")
st.markdown("Harnessing the power of a Large Language Model and AI technology,")
st.markdown("this innovative platform redefines PDF engagement,")
st.markdown("enabling dynamic conversations that bridge the gap between")
st.markdown("human and machine intelligence.")
add_vertical_space(3) # Add more vertical space between text blocks
st.write('Made with ❤️ by Anne')
# API key input (this will not display the entered text)
api_key = st.text_input('Enter your OpenAI API Key:', type='password')
if api_key:
os.environ['OPENAI_API_KEY'] = api_key
else:
st.warning('API key is required to proceed.')
def load_pdf(file_path):
pdf_reader = PdfReader(file_path)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text=text)
store_name = file_path.name[:-4]
if os.path.exists(f"{store_name}.pkl"):
with open(f"{store_name}.pkl", "rb") as f:
VectorStore = pickle.load(f)
else:
embeddings = OpenAIEmbeddings()
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
with open(f"{store_name}.pkl", "wb") as f:
pickle.dump(VectorStore, f)
return VectorStore
def load_chatbot():
return load_qa_chain(llm=OpenAI(), chain_type="stuff")
def main():
st.title("BinDocs Chat App")
pdf = st.file_uploader("Upload your PDF", type="pdf")
if "chat_history" not in st.session_state:
st.session_state['chat_history'] = []
if "current_input" not in st.session_state:
st.session_state['current_input'] = ""
display_chat_history(st.session_state['chat_history'])
st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
st.write("<!-- End Spacer -->", unsafe_allow_html=True)
if pdf is not None:
query = st.text_input("Ask questions about your PDF file (in any preferred language):", value=st.session_state['current_input'])
if st.button("Ask"):
st.session_state['current_input'] = query
st.session_state['chat_history'].append(("User", query, "new"))
loading_message = st.empty()
loading_message.text('Bot is thinking...')
VectorStore = load_pdf(pdf)
chain = load_chatbot()
docs = VectorStore.similarity_search(query=query, k=3)
with get_openai_callback() as cb:
response = chain.run(input_documents=docs, question=query)
# Display the bot's response immediately using JavaScript
st.write(f"<div id='response' style='background-color: #caf; padding: 10px; border-radius: 10px; margin: 10px;'>Bot: {response}</div>", unsafe_allow_html=True)
st.write("<script>document.getElementById('response').scrollIntoView();</script>", unsafe_allow_html=True)
loading_message.empty()
# Mark all messages as old after displaying
st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
def display_chat_history(chat_history):
for chat in chat_history:
background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
if __name__ == "__main__":
main() |