Spaces:
Sleeping
Sleeping
File size: 4,838 Bytes
7c95914 403a475 7c95914 cab37f8 7c95914 403a475 7c95914 cab37f8 7c95914 f9dbffb 7c95914 f9dbffb 7c95914 403a475 f9dbffb 7c95914 cab37f8 7c95914 cab37f8 7c95914 03e4a8d 586a969 7c95914 586a969 7c95914 586a969 7c95914 586a969 7c95914 586a969 7c95914 586a969 7c95914 586a969 7c95914 03e4a8d 403a475 7c95914 403a475 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import streamlit as st
import pickle
from PyPDF2 import PdfReader
from streamlit_extras.add_vertical_space import add_vertical_space
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
import os
# Sidebar contents
with st.sidebar:
st.title(':orange_book: BinDoc GmbH')
st.markdown(
"Experience the future of document interaction with the revolutionary"
)
st.markdown("**BinDocs Chat App**.")
st.markdown("Harnessing the power of a Large Language Model and AI technology,")
st.markdown("this innovative platform redefines PDF engagement,")
st.markdown("enabling dynamic conversations that bridge the gap between")
st.markdown("human and machine intelligence.")
add_vertical_space(3) # Add more vertical space between text blocks
st.write('Made with ❤️ by Anne')
# API key input (this will not display the entered text)
api_key = st.text_input('Enter your OpenAI API Key:', type='password')
if api_key:
os.environ['OPENAI_API_KEY'] = api_key
else:
st.warning('API key is required to proceed.')
def load_pdf(file_path):
pdf_reader = PdfReader(file_path)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text=text)
store_name = file_path.name[:-4]
if os.path.exists(f"{store_name}.pkl"):
with open(f"{store_name}.pkl", "rb") as f:
VectorStore = pickle.load(f)
else:
embeddings = OpenAIEmbeddings() # No api_key parameter here
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
with open(f"{store_name}.pkl", "wb") as f:
pickle.dump(VectorStore, f)
return VectorStore
def load_chatbot():
return load_qa_chain(llm=OpenAI(), chain_type="stuff")
def main():
st.title("BinDocs Chat App")
pdf = st.file_uploader("Upload your PDF", type="pdf")
if "chat_history" not in st.session_state:
st.session_state['chat_history'] = []
if "current_input" not in st.session_state:
st.session_state['current_input'] = ""
display_chat_history(st.session_state['chat_history'])
st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
st.write("<!-- End Spacer -->", unsafe_allow_html=True)
if pdf is not None:
query = st.text_input("Ask questions about your PDF file (in any preferred language):")
if st.button("Ask") or (query and query != st.session_state.get('last_input', '')):
st.session_state['last_input'] = query # Save the current query as the last input
st.session_state['chat_history'].append(("User", query, "new"))
loading_message = st.empty()
loading_message.text('Bot is thinking...')
VectorStore = load_pdf(pdf)
chain = load_chatbot()
docs = VectorStore.similarity_search(query=query, k=3)
with get_openai_callback() as cb:
response = chain.run(input_documents=docs, question=query)
st.session_state['chat_history'].append(("Bot", response, "new"))
# Display new messages at the bottom
new_messages = st.session_state['chat_history'][-2:]
for chat in new_messages:
background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
# Scroll to the latest response using JavaScript
st.write("<script>document.getElementById('response').scrollIntoView();</script>", unsafe_allow_html=True)
loading_message.empty()
# Clear the input field by setting the query variable to an empty string
query = ""
# Mark all messages as old after displaying
st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
def display_chat_history(chat_history):
for chat in chat_history:
background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
if __name__ == "__main__":
main() |