Anne31415 commited on
Commit
9844e99
·
verified ·
1 Parent(s): 72b3b49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -76
app.py CHANGED
@@ -16,12 +16,13 @@ from langchain.callbacks import get_openai_callback
16
  import os
17
  import uuid
18
  import json
 
 
 
19
  import pandas as pd
20
  import pydeck as pdk
21
  from urllib.error import URLError
22
 
23
-
24
-
25
  # Initialize session state variables
26
  if 'chat_history_page1' not in st.session_state:
27
  st.session_state['chat_history_page1'] = []
@@ -59,8 +60,6 @@ repo.git_pull() # Pull the latest changes (if any)
59
 
60
 
61
  # Step 2: Load the PDF File
62
-
63
-
64
  pdf_path = "Private_Book/KH_Reform230124.pdf" # Replace with your PDF file path
65
 
66
  pdf_path2 = "Private_Book/Buch_23012024.pdf"
@@ -70,21 +69,6 @@ pdf_path3 = "Private_Book/Kosten_Strukturdaten_RAG_vorbereited.pdf"
70
  api_key = os.getenv("OPENAI_API_KEY")
71
  # Retrieve the API key from st.secrets
72
 
73
- import chromadb
74
-
75
-
76
-
77
- @st.cache_data
78
- def extract_text_from_pdf(pdf_path):
79
- text = ""
80
- reader = PdfReader(pdf_path)
81
- for page in reader.pages:
82
- text += page.extract_text() + " " # Concatenate text from each page
83
- return text
84
-
85
- # Use the function to get pdf_text
86
- pdf_text = extract_text_from_pdf(pdf_path3)
87
-
88
 
89
 
90
  @st.cache_resource
@@ -132,23 +116,19 @@ def load_vector_store(file_path, store_name, force_reload=False):
132
  return VectorStore
133
 
134
 
135
-
136
-
137
- @st.cache_resource
138
  def load_pdf_text(file_path):
139
  pdf_reader = PdfReader(file_path)
140
  text = ""
141
  for page in pdf_reader.pages:
142
- text += page.extract_text() or ""
143
  return text
144
 
145
-
146
- @st.cache_resource
147
  def load_chatbot():
 
148
  return load_qa_chain(llm=OpenAI(model_name="gpt-3.5-turbo-instruct"), chain_type="stuff")
149
 
150
 
151
-
152
  def display_chat_history(chat_history):
153
  for chat in chat_history:
154
  background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
@@ -499,38 +479,13 @@ def page2():
499
 
500
 
501
 
502
- # Correcting the indentation error and completing the CromA database integration in page3()
503
-
504
  def page3():
505
  try:
506
- hide_streamlit_style = """
507
- <style>
508
- #MainMenu {visibility: hidden;}
509
- footer {visibility: hidden;}
510
- </style>
511
- """
512
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
513
-
514
- # Create columns for layout
515
- col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
516
-
517
- with col1:
518
- st.title("Kosten- und Strukturdaten der Krankenhäuser")
519
-
520
- with col2:
521
- # Load and display the image in the right column, which will be the top-right corner of the page
522
- image = Image.open('BinDoc Logo (Quadratisch).png')
523
- st.image(image, use_column_width='always')
524
 
525
-
526
- if not os.path.exists(pdf_path2):
527
- st.error("File not found. Please check the file path.")
528
- return
529
-
530
- # Initialize CromA client
531
  chroma_client = chromadb.Client()
532
-
533
- # Check if the collection already exists
534
  try:
535
  collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
536
  except Exception as e:
@@ -539,8 +494,7 @@ def page3():
539
  else:
540
  raise e
541
 
542
- # Add documents to the collection (ensure this is not done redundantly)
543
- # It's important to make sure this step doesn't repeat unnecessarily on each rerun
544
  if "documents_added" not in st.session_state:
545
  collection.add(
546
  documents=[pdf_text],
@@ -549,49 +503,41 @@ def page3():
549
  )
550
  st.session_state["documents_added"] = True
551
 
552
-
553
  display_chat_history(st.session_state['chat_history_page3'])
554
- new_messages_placeholder = st.empty()
 
555
  query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
556
 
557
  # Handling query input
558
  if query:
559
  full_query = ask_bot(query)
560
  st.session_state['chat_history_page3'].append(("User", query, "new"))
561
-
562
- # Start timing for response
563
- start_time = time.time()
564
-
565
- # Querying the CromA collection
566
  results = collection.query(
567
  query_texts=[full_query],
568
  n_results=5 # Adjust the number of results as needed
569
  )
570
-
571
- # Calculate the response duration
572
- end_time = time.time()
573
- duration = end_time - start_time
574
-
575
  # Process and display response from CromA results
576
  if results:
577
- # TODO: Adjust the following logic based on CromA's actual result structure
578
  response = f"Top result: {results[0]['text']}" # Example response using the first result
579
  else:
580
  response = "No results found for your query."
581
-
582
  st.session_state['chat_history_page3'].append(("Eve", response, "new"))
583
 
584
- # Simple interaction
585
- if st.button("Test Button"):
586
- st.write("Button clicked.")
 
 
587
 
588
  except Exception as e:
589
  st.error(f"An error occurred: {repr(e)}")
590
 
591
 
592
-
593
-
594
-
595
  def page4():
596
  try:
597
  st.header(":mailbox: Kontakt & Feedback!")
 
16
  import os
17
  import uuid
18
  import json
19
+ import chromadb
20
+
21
+
22
  import pandas as pd
23
  import pydeck as pdk
24
  from urllib.error import URLError
25
 
 
 
26
  # Initialize session state variables
27
  if 'chat_history_page1' not in st.session_state:
28
  st.session_state['chat_history_page1'] = []
 
60
 
61
 
62
  # Step 2: Load the PDF File
 
 
63
  pdf_path = "Private_Book/KH_Reform230124.pdf" # Replace with your PDF file path
64
 
65
  pdf_path2 = "Private_Book/Buch_23012024.pdf"
 
69
  api_key = os.getenv("OPENAI_API_KEY")
70
  # Retrieve the API key from st.secrets
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
 
74
  @st.cache_resource
 
116
  return VectorStore
117
 
118
 
119
+ # Utility function to load text from a PDF
 
 
120
  def load_pdf_text(file_path):
121
  pdf_reader = PdfReader(file_path)
122
  text = ""
123
  for page in pdf_reader.pages:
124
+ text += page.extract_text() or "" # Add fallback for pages where text extraction fails
125
  return text
126
 
 
 
127
  def load_chatbot():
128
+ #return load_qa_chain(llm=OpenAI(), chain_type="stuff")
129
  return load_qa_chain(llm=OpenAI(model_name="gpt-3.5-turbo-instruct"), chain_type="stuff")
130
 
131
 
 
132
  def display_chat_history(chat_history):
133
  for chat in chat_history:
134
  background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
 
479
 
480
 
481
 
 
 
482
  def page3():
483
  try:
484
+ # Basic layout setup
485
+ st.title("Kosten- und Strukturdaten der Krankenhäuser")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
 
487
+ # Initialize CromA client and handle collection
 
 
 
 
 
488
  chroma_client = chromadb.Client()
 
 
489
  try:
490
  collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
491
  except Exception as e:
 
494
  else:
495
  raise e
496
 
497
+ # Add documents to the collection if not already done
 
498
  if "documents_added" not in st.session_state:
499
  collection.add(
500
  documents=[pdf_text],
 
503
  )
504
  st.session_state["documents_added"] = True
505
 
506
+ # Display chat history
507
  display_chat_history(st.session_state['chat_history_page3'])
508
+
509
+ # User query input
510
  query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
511
 
512
  # Handling query input
513
  if query:
514
  full_query = ask_bot(query)
515
  st.session_state['chat_history_page3'].append(("User", query, "new"))
516
+
517
+ # Query the CromA collection
 
 
 
518
  results = collection.query(
519
  query_texts=[full_query],
520
  n_results=5 # Adjust the number of results as needed
521
  )
522
+
 
 
 
 
523
  # Process and display response from CromA results
524
  if results:
 
525
  response = f"Top result: {results[0]['text']}" # Example response using the first result
526
  else:
527
  response = "No results found for your query."
528
+
529
  st.session_state['chat_history_page3'].append(("Eve", response, "new"))
530
 
531
+ # Display new messages at the bottom
532
+ new_messages = st.session_state['chat_history_page3'][-2:]
533
+ for chat in new_messages:
534
+ background_color = "#ffeecf"
535
+ st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
536
 
537
  except Exception as e:
538
  st.error(f"An error occurred: {repr(e)}")
539
 
540
 
 
 
 
541
  def page4():
542
  try:
543
  st.header(":mailbox: Kontakt & Feedback!")