Anne31415 commited on
Commit
3e6af9f
·
1 Parent(s): 1e296b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -64
app.py CHANGED
@@ -15,11 +15,18 @@ from langchain.chains.question_answering import load_qa_chain
15
  from langchain.callbacks import get_openai_callback
16
  import os
17
 
18
- #st.set_page_config(layout="wide")
 
 
 
 
 
 
 
 
 
19
 
20
 
21
- # Set the page config to make the sidebar start in the collapsed state
22
- st.set_page_config(initial_sidebar_state="collapsed")
23
 
24
  # Step 1: Clone the Dataset Repository
25
  repo = Repository(
@@ -33,54 +40,39 @@ repo.git_pull() # Pull the latest changes (if any)
33
  # Step 2: Load the PDF File
34
  pdf_path = "Private_Book/141123_Kombi_compressed.pdf" # Replace with your PDF file path
35
 
36
- with st.sidebar:
37
- st.title('BinDoc GmbH')
38
- st.markdown("Experience revolutionary interaction with BinDocs Chat App, leveraging state-of-the-art AI technology.")
39
-
40
- add_vertical_space(1) # Adjust as per the desired spacing
41
-
42
- st.markdown("""
43
- Hello! I’m here to assist you with:<br><br>
44
- 📘 **Glossary Inquiries:**<br>
45
- I can clarify terms like "DiGA", "AOP", or "BfArM", providing clear and concise explanations to help you understand our content better.<br><br>
46
- 🆘 **Help Page Navigation:**<br>
47
- Ask me if you forgot your password or want to know more about topics related to the platform.<br><br>
48
- 📰 **Latest Whitepapers Insights:**<br>
49
- Curious about our recent publications? Feel free to ask about our latest whitepapers!<br><br>
50
- """, unsafe_allow_html=True)
51
-
52
- add_vertical_space(1) # Adjust as per the desired spacing
53
 
54
- st.write('Made with ❤️ by BinDoc GmbH')
55
 
56
- api_key = os.getenv("OPENAI_API_KEY")
57
- # Retrieve the API key from st.secrets
58
 
59
- # Updated caching mechanism using st.cache_data
60
- @st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
61
 
62
 
 
 
63
  def load_vector_store(file_path, store_name, force_reload=False):
64
- # Check if we need to force reload the vector store (e.g., when the PDF changes)
65
- if force_reload or not os.path.exists(f"{store_name}.pkl"):
66
- text_splitter = RecursiveCharacterTextSplitter(
67
- chunk_size=1000,
68
- chunk_overlap=200,
69
- length_function=len
70
- )
71
-
72
- text = load_pdf_text(file_path)
73
- chunks = text_splitter.split_text(text=text)
74
-
75
- embeddings = OpenAIEmbeddings()
76
- VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
77
- with open(f"{store_name}.pkl", "wb") as f:
78
- pickle.dump(VectorStore, f)
79
- else:
80
- with open(f"{store_name}.pkl", "rb") as f:
81
- VectorStore = pickle.load(f)
82
 
83
- return VectorStore
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  # Utility function to load text from a PDF
86
  def load_pdf_text(file_path):
@@ -93,7 +85,16 @@ def load_pdf_text(file_path):
93
  def load_chatbot():
94
  return load_qa_chain(llm=OpenAI(), chain_type="stuff")
95
 
96
- def main():
 
 
 
 
 
 
 
 
 
97
  try:
98
  hide_streamlit_style = """
99
  <style>
@@ -114,22 +115,16 @@ def main():
114
  image = Image.open('BinDoc Logo (Quadratisch).png')
115
  st.image(image, use_column_width='always')
116
 
117
-
118
-
119
-
120
  # Start tracking user interactions
121
  with streamlit_analytics.track():
122
  if not os.path.exists(pdf_path):
123
  st.error("File not found. Please check the file path.")
124
  return
125
 
126
- VectorStore = load_vector_store(pdf_path, "my_vector_store", force_reload=False)
127
-
128
-
129
- if "chat_history" not in st.session_state:
130
- st.session_state['chat_history'] = []
131
-
132
- display_chat_history(st.session_state['chat_history'])
133
 
134
  st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
135
  st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
@@ -163,7 +158,7 @@ def main():
163
 
164
 
165
  if query:
166
- st.session_state['chat_history'].append(("User", query, "new"))
167
 
168
  # Start timing
169
  start_time = time.time()
@@ -185,11 +180,11 @@ def main():
185
  # You can use Streamlit's text function to display the timing
186
  st.text(f"Response time: {duration:.2f} seconds")
187
 
188
- st.session_state['chat_history'].append(("Bot", response, "new"))
189
 
190
 
191
  # Display new messages at the bottom
192
- new_messages = st.session_state['chat_history'][-2:]
193
  for chat in new_messages:
194
  background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
195
  new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
@@ -199,18 +194,144 @@ def main():
199
  query = ""
200
 
201
  # Mark all messages as old after displaying
202
- st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
203
 
204
  except Exception as e:
205
  st.error(f"Upsi, an unexpected error occurred: {e}")
206
  # Optionally log the exception details to a file or error tracking service
207
 
208
 
209
- def display_chat_history(chat_history):
210
- for chat in chat_history:
211
- background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
212
- st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
 
215
  if __name__ == "__main__":
216
- main()
 
15
  from langchain.callbacks import get_openai_callback
16
  import os
17
 
18
+ import pandas as pd
19
+ import pydeck as pdk
20
+ from urllib.error import URLError
21
+
22
+ # Initialize session state variables
23
+ if 'chat_history_page1' not in st.session_state:
24
+ st.session_state['chat_history_page1'] = []
25
+
26
+ if 'chat_history_page2' not in st.session_state:
27
+ st.session_state['chat_history_page2'] = []
28
 
29
 
 
 
30
 
31
  # Step 1: Clone the Dataset Repository
32
  repo = Repository(
 
40
  # Step 2: Load the PDF File
41
  pdf_path = "Private_Book/141123_Kombi_compressed.pdf" # Replace with your PDF file path
42
 
43
+ # Step 2: Load the PDF File
44
+ pdf_path2 = "Private_Book/Deutsche_Kodierrichtlinien_23.pdf" # Replace with your PDF file path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
 
46
 
47
+ api_key = os.getenv("OPENAI_API_KEY")
48
+ # Retrieve the API key from st.secrets
49
 
 
 
50
 
51
 
52
+ # Updated caching mechanism using st.cache_data
53
+ @st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
54
  def load_vector_store(file_path, store_name, force_reload=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ # Check if we need to force reload the vector store (e.g., when the PDF changes)
57
+ if force_reload or not os.path.exists(f"{store_name}.pkl"):
58
+ text_splitter = RecursiveCharacterTextSplitter(
59
+ chunk_size=1000,
60
+ chunk_overlap=200,
61
+ length_function=len
62
+ )
63
+
64
+ text = load_pdf_text(file_path)
65
+ chunks = text_splitter.split_text(text=text)
66
+
67
+ embeddings = OpenAIEmbeddings()
68
+ VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
69
+ with open(f"{store_name}.pkl", "wb") as f:
70
+ pickle.dump(VectorStore, f)
71
+ else:
72
+ with open(f"{store_name}.pkl", "rb") as f:
73
+ VectorStore = pickle.load(f)
74
+
75
+ return VectorStore
76
 
77
  # Utility function to load text from a PDF
78
  def load_pdf_text(file_path):
 
85
  def load_chatbot():
86
  return load_qa_chain(llm=OpenAI(), chain_type="stuff")
87
 
88
+
89
+ def display_chat_history(chat_history):
90
+ for chat in chat_history:
91
+ background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
92
+ st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
93
+
94
+
95
+
96
+
97
+ def page1():
98
  try:
99
  hide_streamlit_style = """
100
  <style>
 
115
  image = Image.open('BinDoc Logo (Quadratisch).png')
116
  st.image(image, use_column_width='always')
117
 
118
+
 
 
119
  # Start tracking user interactions
120
  with streamlit_analytics.track():
121
  if not os.path.exists(pdf_path):
122
  st.error("File not found. Please check the file path.")
123
  return
124
 
125
+ VectorStore = load_vector_store(pdf_path, "vector_store_page1", force_reload=False)
126
+
127
+ display_chat_history(st.session_state['chat_history_page1'])
 
 
 
 
128
 
129
  st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
130
  st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
 
158
 
159
 
160
  if query:
161
+ st.session_state['chat_history_page1'].append(("User", query, "new"))
162
 
163
  # Start timing
164
  start_time = time.time()
 
180
  # You can use Streamlit's text function to display the timing
181
  st.text(f"Response time: {duration:.2f} seconds")
182
 
183
+ st.session_state['chat_history_page1'].append(("Bot", response, "new"))
184
 
185
 
186
  # Display new messages at the bottom
187
+ new_messages = st.session_state['chat_history_page1'][-2:]
188
  for chat in new_messages:
189
  background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
190
  new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
 
194
  query = ""
195
 
196
  # Mark all messages as old after displaying
197
+ st.session_state['chat_history_page1'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page1']]
198
 
199
  except Exception as e:
200
  st.error(f"Upsi, an unexpected error occurred: {e}")
201
  # Optionally log the exception details to a file or error tracking service
202
 
203
 
204
+
205
+
206
+ def page2():
207
+ try:
208
+ hide_streamlit_style = """
209
+ <style>
210
+ #MainMenu {visibility: hidden;}
211
+ footer {visibility: hidden;}
212
+ </style>
213
+ """
214
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
215
+
216
+ # Create columns for layout
217
+ col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
218
+
219
+ with col1:
220
+ st.title("Kodieren statt Frustrieren!")
221
+
222
+ with col2:
223
+ # Load and display the image in the right column, which will be the top-right corner of the page
224
+ image = Image.open('BinDoc Logo (Quadratisch).png')
225
+ st.image(image, use_column_width='always')
226
+
227
+
228
+ # Start tracking user interactions
229
+ with streamlit_analytics.track():
230
+
231
+ if not os.path.exists(pdf_path2):
232
+ st.error("File not found. Please check the file path.")
233
+ return
234
+
235
+ VectorStore = load_vector_store(pdf_path2, "vector_store_page2", force_reload=False)
236
+
237
+
238
+
239
+ display_chat_history(st.session_state['chat_history_page2'])
240
+
241
+ st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
242
+ st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
243
+ st.write("<!-- End Spacer -->", unsafe_allow_html=True)
244
+
245
+ new_messages_placeholder = st.empty()
246
+
247
+ query = st.text_input("Ask questions about your PDF file (in any preferred language):")
248
+
249
+ add_vertical_space(2) # Adjust as per the desired spacing
250
+
251
+ # Create two columns for the buttons
252
+ col1, col2 = st.columns(2)
253
+
254
+ with col1:
255
+ if st.button("Wann kodiere ich etwas als Hauptdiagnose und wann als Nebendiagnose?"):
256
+ query = "Wann kodiere ich etwas als Hauptdiagnose und wann als Nebendiagnose?"
257
+ if st.button("Ein Patient wird mit Aszites bei bekannter Leberzirrhose stationär aufgenommen. Es wird nur der Aszites durch eine Punktion behandelt.Wie kodiere ich das?"):
258
+ query = ("Ein Patient wird mit Aszites bei bekannter Leberzirrhose stationär aufgenommen. Es wird nur der Aszites durch eine Punktion behandelt.Wie kodiere ich das?")
259
+ if st.button("Hauptdiagnose: Hirntumor wie kodiere ich das?"):
260
+ query = "Hauptdiagnose: Hirntumor wie kodiere ich das?"
261
+
262
+
263
+ with col2:
264
+ if st.button("Welche Prozeduren werden normalerweise nicht verschlüsselt?"):
265
+ query = "Welche Prozeduren werden normalerweise nicht verschlüsselt?"
266
+ if st.button("Was muss ich bei der Kodierung der Folgezusänden von Krankheiten beachten?"):
267
+ query = "Was muss ich bei der Kodierung der Folgezusänden von Krankheiten beachten?"
268
+ if st.button("Was mache ich bei einer Verdachtsdiagnose, wenn mein Patien nach Hause entlassen wird?"):
269
+ query = "Was mache ich bei einer Verdachtsdiagnose, wenn mein Patien nach Hause entlassen wird?"
270
+
271
+
272
+ if query:
273
+ st.session_state['chat_history_page2'].append(("User", query, "new"))
274
+
275
+ # Start timing
276
+ start_time = time.time()
277
+
278
+ with st.spinner('Bot is thinking...'):
279
+ # Use the VectorStore loaded at the start from the session state
280
+ chain = load_chatbot()
281
+ docs = VectorStore.similarity_search(query=query, k=3)
282
+ with get_openai_callback() as cb:
283
+ response = chain.run(input_documents=docs, question=query)
284
+
285
+
286
+ # Stop timing
287
+ end_time = time.time()
288
+
289
+ # Calculate duration
290
+ duration = end_time - start_time
291
+
292
+ # You can use Streamlit's text function to display the timing
293
+ st.text(f"Response time: {duration:.2f} seconds")
294
+
295
+ st.session_state['chat_history_page2'].append(("Bot", response, "new"))
296
+
297
+
298
+ # Display new messages at the bottom
299
+ new_messages = st.session_state['chat_history_page2'][-2:]
300
+ for chat in new_messages:
301
+ background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
302
+ new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
303
+
304
+
305
+ # Clear the input field after the query is made
306
+ query = ""
307
+
308
+ # Mark all messages as old after displaying
309
+ st.session_state['chat_history_page2'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page2']]
310
+
311
+ except Exception as e:
312
+ st.error(f"Upsi, an unexpected error occurred: {e}")
313
+ # Optionally log the exception details to a file or error tracking service
314
+
315
+
316
+
317
+
318
+
319
+ def main():
320
+ # Sidebar content
321
+ with st.sidebar:
322
+ st.title('BinDoc GmbH')
323
+ st.markdown("Experience revolutionary interaction with BinDocs Chat App, leveraging state-of-the-art AI technology.")
324
+ add_vertical_space(1)
325
+ page = st.sidebar.selectbox("Choose a page", ["Document Analysis Bot", "Coding Assistance Bot"])
326
+ add_vertical_space(1)
327
+ st.write('Made with ❤️ by BinDoc GmbH')
328
+
329
+ # Main area content based on page selection
330
+ if page == "Document Analysis Bot":
331
+ page1()
332
+ elif page == "Coding Assistance Bot":
333
+ page2()
334
 
335
 
336
  if __name__ == "__main__":
337
+ main()