|
import streamlit as st |
|
from PIL import Image |
|
import time |
|
import streamlit_analytics |
|
from dotenv import load_dotenv |
|
import pickle |
|
from huggingface_hub import Repository |
|
from PyPDF2 import PdfReader |
|
from streamlit_extras.add_vertical_space import add_vertical_space |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.llms import OpenAI |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.callbacks import get_openai_callback |
|
import os |
|
|
|
import pandas as pd |
|
import pydeck as pdk |
|
from urllib.error import URLError |
|
|
|
|
|
if 'chat_history_page1' not in st.session_state: |
|
st.session_state['chat_history_page1'] = [] |
|
|
|
if 'chat_history_page2' not in st.session_state: |
|
st.session_state['chat_history_page2'] = [] |
|
|
|
|
|
|
|
|
|
repo = Repository( |
|
local_dir="Private_Book", |
|
repo_type="dataset", |
|
clone_from="Anne31415/Private_Book", |
|
token=os.environ["HUB_TOKEN"] |
|
) |
|
repo.git_pull() |
|
|
|
|
|
pdf_path = "Private_Book/141123_Kombi.pdf" |
|
|
|
|
|
pdf_path2 = "Private_Book/Deutsche_Kodierrichtlinien_23.pdf" |
|
|
|
|
|
api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data(persist="disk") |
|
def load_vector_store(file_path, store_name, force_reload=False): |
|
|
|
|
|
if force_reload or not os.path.exists(f"{store_name}.pkl"): |
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1000, |
|
chunk_overlap=200, |
|
length_function=len |
|
) |
|
|
|
text = load_pdf_text(file_path) |
|
chunks = text_splitter.split_text(text=text) |
|
|
|
embeddings = OpenAIEmbeddings() |
|
VectorStore = FAISS.from_texts(chunks, embedding=embeddings) |
|
with open(f"{store_name}.pkl", "wb") as f: |
|
pickle.dump(VectorStore, f) |
|
else: |
|
with open(f"{store_name}.pkl", "rb") as f: |
|
VectorStore = pickle.load(f) |
|
|
|
return VectorStore |
|
|
|
|
|
def load_pdf_text(file_path): |
|
pdf_reader = PdfReader(file_path) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() or "" |
|
return text |
|
|
|
def load_chatbot(): |
|
|
|
return load_qa_chain(llm=OpenAI(model_name="gpt-3.5-turbo-instruct"), chain_type="stuff") |
|
|
|
|
|
def display_chat_history(chat_history): |
|
for chat in chat_history: |
|
background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf" |
|
st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True) |
|
|
|
|
|
def handle_no_answer(response): |
|
no_answer_phrases = [ |
|
"ich weiß es nicht", |
|
"ich weis es nicht", |
|
"ich bin mir nicht sicher", |
|
"es wird nicht erwähnt", |
|
"keine Information", |
|
"das ist unklar", |
|
"da habe ich keine Antwort", |
|
"das kann ich nicht beantworten" |
|
"I do not have enough context" |
|
"I don't know", |
|
"I am not sure", |
|
"It is not mentioned", |
|
"No information", |
|
"That's unclear", |
|
"I have no answer to that" |
|
|
|
] |
|
|
|
|
|
if any(phrase in response.lower() for phrase in no_answer_phrases): |
|
return "Hmm, das ist eine knifflige Frage. Ich habe vielleicht nicht auf alles eine Antwort, aber lass uns das gemeinsam erkunden. Kannst du mir mehr Details geben oder eine andere Frage stellen?" |
|
return response |
|
|
|
|
|
|
|
def page1(): |
|
try: |
|
hide_streamlit_style = """ |
|
<style> |
|
#MainMenu {visibility: hidden;} |
|
footer {visibility: hidden;} |
|
</style> |
|
""" |
|
st.markdown(hide_streamlit_style, unsafe_allow_html=True) |
|
|
|
|
|
col1, col2 = st.columns([3, 1]) |
|
|
|
with col1: |
|
st.title("Welcome to BinDocs AI!") |
|
|
|
with col2: |
|
|
|
image = Image.open('BinDoc Logo (Quadratisch).png') |
|
st.image(image, use_column_width='always') |
|
|
|
|
|
|
|
with streamlit_analytics.track(): |
|
if not os.path.exists(pdf_path): |
|
st.error("File not found. Please check the file path.") |
|
return |
|
|
|
VectorStore = load_vector_store(pdf_path, "vector_store_page1", force_reload=False) |
|
|
|
display_chat_history(st.session_state['chat_history_page1']) |
|
|
|
st.write("<!-- Start Spacer -->", unsafe_allow_html=True) |
|
st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True) |
|
st.write("<!-- End Spacer -->", unsafe_allow_html=True) |
|
|
|
new_messages_placeholder = st.empty() |
|
|
|
query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:") |
|
|
|
add_vertical_space(2) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
if st.button("Was kann ich mit dem Prognose-Analyse-Tool machen?"): |
|
query = "Was kann ich mit dem Prognose-Analyse-Tool machen?" |
|
if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"): |
|
query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?" |
|
if st.button("Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"): |
|
query = "Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?" |
|
|
|
|
|
with col2: |
|
if st.button("Dies ist eine reine Test Frage, welche aber eine ausreichende Länge hat."): |
|
query = "Dies ist eine reine Test Frage, welche aber eine ausreichende Länge hat." |
|
if st.button("Was sagt mir denn generell die wundervolle Bevölkerungsentwicklung?"): |
|
query = "Was sagt mir denn generell die wundervolle Bevölkerungsentwicklung?" |
|
if st.button("Ob ich hier wohl viel schreibe, dass die Fragen vom Layout her passen?"): |
|
query = "Ob ich hier wohl viel schreibe, dass die Fragen vom Layout her passen?" |
|
|
|
|
|
if query: |
|
st.session_state['chat_history_page1'].append(("User", query, "new")) |
|
|
|
|
|
start_time = time.time() |
|
|
|
with st.spinner('Bot is thinking...'): |
|
chain = load_chatbot() |
|
docs = VectorStore.similarity_search(query=query, k=3) |
|
with get_openai_callback() as cb: |
|
response = chain.run(input_documents=docs, question=query) |
|
response = handle_no_answer(response) |
|
|
|
|
|
|
|
|
|
end_time = time.time() |
|
|
|
|
|
duration = end_time - start_time |
|
|
|
|
|
st.text(f"Response time: {duration:.2f} seconds") |
|
|
|
st.session_state['chat_history_page1'].append(("Bot", response, "new")) |
|
|
|
|
|
|
|
new_messages = st.session_state['chat_history_page1'][-2:] |
|
for chat in new_messages: |
|
background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf" |
|
new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
query = "" |
|
|
|
|
|
st.session_state['chat_history_page1'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page1']] |
|
|
|
except Exception as e: |
|
st.error(f"Upsi, an unexpected error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def page2(): |
|
try: |
|
hide_streamlit_style = """ |
|
<style> |
|
#MainMenu {visibility: hidden;} |
|
footer {visibility: hidden;} |
|
</style> |
|
""" |
|
st.markdown(hide_streamlit_style, unsafe_allow_html=True) |
|
|
|
|
|
col1, col2 = st.columns([3, 1]) |
|
|
|
with col1: |
|
st.title("Kodieren statt Frustrieren!") |
|
|
|
with col2: |
|
|
|
image = Image.open('BinDoc Logo (Quadratisch).png') |
|
st.image(image, use_column_width='always') |
|
|
|
|
|
|
|
with streamlit_analytics.track(): |
|
|
|
if not os.path.exists(pdf_path2): |
|
st.error("File not found. Please check the file path.") |
|
return |
|
|
|
VectorStore = load_vector_store(pdf_path2, "vector_store_page2", force_reload=False) |
|
|
|
|
|
|
|
display_chat_history(st.session_state['chat_history_page2']) |
|
|
|
st.write("<!-- Start Spacer -->", unsafe_allow_html=True) |
|
st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True) |
|
st.write("<!-- End Spacer -->", unsafe_allow_html=True) |
|
|
|
new_messages_placeholder = st.empty() |
|
|
|
query = st.text_input("Ask questions about your PDF file (in any preferred language):") |
|
|
|
add_vertical_space(2) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
if st.button("Wann kodiere ich etwas als Hauptdiagnose und wann als Nebendiagnose?"): |
|
query = "Wann kodiere ich etwas als Hauptdiagnose und wann als Nebendiagnose?" |
|
if st.button("Ein Patient wird mit Aszites bei bekannter Leberzirrhose stationär aufgenommen. Es wird nur der Aszites durch eine Punktion behandelt.Wie kodiere ich das?"): |
|
query = ("Ein Patient wird mit Aszites bei bekannter Leberzirrhose stationär aufgenommen. Es wird nur der Aszites durch eine Punktion behandelt.Wie kodiere ich das?") |
|
if st.button("Hauptdiagnose: Hirntumor wie kodiere ich das?"): |
|
query = "Hauptdiagnose: Hirntumor wie kodiere ich das?" |
|
|
|
|
|
with col2: |
|
if st.button("Welche Prozeduren werden normalerweise nicht verschlüsselt?"): |
|
query = "Welche Prozeduren werden normalerweise nicht verschlüsselt?" |
|
if st.button("Was muss ich bei der Kodierung der Folgezusänden von Krankheiten beachten?"): |
|
query = "Was muss ich bei der Kodierung der Folgezusänden von Krankheiten beachten?" |
|
if st.button("Was mache ich bei einer Verdachtsdiagnose, wenn mein Patien nach Hause entlassen wird?"): |
|
query = "Was mache ich bei einer Verdachtsdiagnose, wenn mein Patien nach Hause entlassen wird?" |
|
|
|
|
|
if query: |
|
st.session_state['chat_history_page2'].append(("User", query, "new")) |
|
|
|
|
|
start_time = time.time() |
|
|
|
with st.spinner('Bot is thinking...'): |
|
chain = load_chatbot() |
|
docs = VectorStore.similarity_search(query=query, k=3) |
|
with get_openai_callback() as cb: |
|
response = chain.run(input_documents=docs, question=query) |
|
response = handle_no_answer(response) |
|
|
|
|
|
|
|
|
|
end_time = time.time() |
|
|
|
|
|
duration = end_time - start_time |
|
|
|
|
|
st.text(f"Response time: {duration:.2f} seconds") |
|
|
|
st.session_state['chat_history_page2'].append(("Bot", response, "new")) |
|
|
|
|
|
|
|
new_messages = st.session_state['chat_history_page2'][-2:] |
|
for chat in new_messages: |
|
background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf" |
|
new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
query = "" |
|
|
|
|
|
st.session_state['chat_history_page2'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page2']] |
|
|
|
except Exception as e: |
|
st.error(f"Upsi, an unexpected error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
with st.sidebar: |
|
st.title('BinDoc GmbH') |
|
st.markdown("Experience revolutionary interaction with BinDocs Chat App, leveraging state-of-the-art AI technology.") |
|
add_vertical_space(1) |
|
page = st.sidebar.selectbox("Choose a page", ["Document Analysis Bot", "Coding Assistance Bot"]) |
|
add_vertical_space(1) |
|
st.write('Made with ❤️ by BinDoc GmbH') |
|
|
|
|
|
if page == "Document Analysis Bot": |
|
page1() |
|
elif page == "Coding Assistance Bot": |
|
page2() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |