Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,12 +16,13 @@ from langchain.callbacks import get_openai_callback
|
|
16 |
import os
|
17 |
import uuid
|
18 |
import json
|
|
|
|
|
|
|
19 |
import pandas as pd
|
20 |
import pydeck as pdk
|
21 |
from urllib.error import URLError
|
22 |
|
23 |
-
|
24 |
-
|
25 |
# Initialize session state variables
|
26 |
if 'chat_history_page1' not in st.session_state:
|
27 |
st.session_state['chat_history_page1'] = []
|
@@ -59,8 +60,6 @@ repo.git_pull() # Pull the latest changes (if any)
|
|
59 |
|
60 |
|
61 |
# Step 2: Load the PDF File
|
62 |
-
|
63 |
-
|
64 |
pdf_path = "Private_Book/KH_Reform230124.pdf" # Replace with your PDF file path
|
65 |
|
66 |
pdf_path2 = "Private_Book/Buch_23012024.pdf"
|
@@ -70,21 +69,6 @@ pdf_path3 = "Private_Book/Kosten_Strukturdaten_RAG_vorbereited.pdf"
|
|
70 |
api_key = os.getenv("OPENAI_API_KEY")
|
71 |
# Retrieve the API key from st.secrets
|
72 |
|
73 |
-
import chromadb
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
@st.cache_data
|
78 |
-
def extract_text_from_pdf(pdf_path):
|
79 |
-
text = ""
|
80 |
-
reader = PdfReader(pdf_path)
|
81 |
-
for page in reader.pages:
|
82 |
-
text += page.extract_text() + " " # Concatenate text from each page
|
83 |
-
return text
|
84 |
-
|
85 |
-
# Use the function to get pdf_text
|
86 |
-
pdf_text = extract_text_from_pdf(pdf_path3)
|
87 |
-
|
88 |
|
89 |
|
90 |
@st.cache_resource
|
@@ -132,23 +116,19 @@ def load_vector_store(file_path, store_name, force_reload=False):
|
|
132 |
return VectorStore
|
133 |
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
@st.cache_resource
|
138 |
def load_pdf_text(file_path):
|
139 |
pdf_reader = PdfReader(file_path)
|
140 |
text = ""
|
141 |
for page in pdf_reader.pages:
|
142 |
-
text += page.extract_text() or ""
|
143 |
return text
|
144 |
|
145 |
-
|
146 |
-
@st.cache_resource
|
147 |
def load_chatbot():
|
|
|
148 |
return load_qa_chain(llm=OpenAI(model_name="gpt-3.5-turbo-instruct"), chain_type="stuff")
|
149 |
|
150 |
|
151 |
-
|
152 |
def display_chat_history(chat_history):
|
153 |
for chat in chat_history:
|
154 |
background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
|
@@ -499,38 +479,13 @@ def page2():
|
|
499 |
|
500 |
|
501 |
|
502 |
-
# Correcting the indentation error and completing the CromA database integration in page3()
|
503 |
-
|
504 |
def page3():
|
505 |
try:
|
506 |
-
|
507 |
-
|
508 |
-
#MainMenu {visibility: hidden;}
|
509 |
-
footer {visibility: hidden;}
|
510 |
-
</style>
|
511 |
-
"""
|
512 |
-
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
513 |
-
|
514 |
-
# Create columns for layout
|
515 |
-
col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
|
516 |
-
|
517 |
-
with col1:
|
518 |
-
st.title("Kosten- und Strukturdaten der Krankenhäuser")
|
519 |
-
|
520 |
-
with col2:
|
521 |
-
# Load and display the image in the right column, which will be the top-right corner of the page
|
522 |
-
image = Image.open('BinDoc Logo (Quadratisch).png')
|
523 |
-
st.image(image, use_column_width='always')
|
524 |
|
525 |
-
|
526 |
-
if not os.path.exists(pdf_path2):
|
527 |
-
st.error("File not found. Please check the file path.")
|
528 |
-
return
|
529 |
-
|
530 |
-
# Initialize CromA client
|
531 |
chroma_client = chromadb.Client()
|
532 |
-
|
533 |
-
# Check if the collection already exists
|
534 |
try:
|
535 |
collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
|
536 |
except Exception as e:
|
@@ -539,8 +494,7 @@ def page3():
|
|
539 |
else:
|
540 |
raise e
|
541 |
|
542 |
-
# Add documents to the collection
|
543 |
-
# It's important to make sure this step doesn't repeat unnecessarily on each rerun
|
544 |
if "documents_added" not in st.session_state:
|
545 |
collection.add(
|
546 |
documents=[pdf_text],
|
@@ -549,49 +503,41 @@ def page3():
|
|
549 |
)
|
550 |
st.session_state["documents_added"] = True
|
551 |
|
552 |
-
|
553 |
display_chat_history(st.session_state['chat_history_page3'])
|
554 |
-
|
|
|
555 |
query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
|
556 |
|
557 |
# Handling query input
|
558 |
if query:
|
559 |
full_query = ask_bot(query)
|
560 |
st.session_state['chat_history_page3'].append(("User", query, "new"))
|
561 |
-
|
562 |
-
#
|
563 |
-
start_time = time.time()
|
564 |
-
|
565 |
-
# Querying the CromA collection
|
566 |
results = collection.query(
|
567 |
query_texts=[full_query],
|
568 |
n_results=5 # Adjust the number of results as needed
|
569 |
)
|
570 |
-
|
571 |
-
# Calculate the response duration
|
572 |
-
end_time = time.time()
|
573 |
-
duration = end_time - start_time
|
574 |
-
|
575 |
# Process and display response from CromA results
|
576 |
if results:
|
577 |
-
# TODO: Adjust the following logic based on CromA's actual result structure
|
578 |
response = f"Top result: {results[0]['text']}" # Example response using the first result
|
579 |
else:
|
580 |
response = "No results found for your query."
|
581 |
-
|
582 |
st.session_state['chat_history_page3'].append(("Eve", response, "new"))
|
583 |
|
584 |
-
#
|
585 |
-
|
586 |
-
|
|
|
|
|
587 |
|
588 |
except Exception as e:
|
589 |
st.error(f"An error occurred: {repr(e)}")
|
590 |
|
591 |
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
def page4():
|
596 |
try:
|
597 |
st.header(":mailbox: Kontakt & Feedback!")
|
|
|
16 |
import os
|
17 |
import uuid
|
18 |
import json
|
19 |
+
import chromadb
|
20 |
+
|
21 |
+
|
22 |
import pandas as pd
|
23 |
import pydeck as pdk
|
24 |
from urllib.error import URLError
|
25 |
|
|
|
|
|
26 |
# Initialize session state variables
|
27 |
if 'chat_history_page1' not in st.session_state:
|
28 |
st.session_state['chat_history_page1'] = []
|
|
|
60 |
|
61 |
|
62 |
# Step 2: Load the PDF File
|
|
|
|
|
63 |
pdf_path = "Private_Book/KH_Reform230124.pdf" # Replace with your PDF file path
|
64 |
|
65 |
pdf_path2 = "Private_Book/Buch_23012024.pdf"
|
|
|
69 |
api_key = os.getenv("OPENAI_API_KEY")
|
70 |
# Retrieve the API key from st.secrets
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
|
74 |
@st.cache_resource
|
|
|
116 |
return VectorStore
|
117 |
|
118 |
|
119 |
+
# Utility function to load text from a PDF
|
|
|
|
|
120 |
def load_pdf_text(file_path):
|
121 |
pdf_reader = PdfReader(file_path)
|
122 |
text = ""
|
123 |
for page in pdf_reader.pages:
|
124 |
+
text += page.extract_text() or "" # Add fallback for pages where text extraction fails
|
125 |
return text
|
126 |
|
|
|
|
|
127 |
def load_chatbot():
|
128 |
+
#return load_qa_chain(llm=OpenAI(), chain_type="stuff")
|
129 |
return load_qa_chain(llm=OpenAI(model_name="gpt-3.5-turbo-instruct"), chain_type="stuff")
|
130 |
|
131 |
|
|
|
132 |
def display_chat_history(chat_history):
|
133 |
for chat in chat_history:
|
134 |
background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
|
|
|
479 |
|
480 |
|
481 |
|
|
|
|
|
482 |
def page3():
|
483 |
try:
|
484 |
+
# Basic layout setup
|
485 |
+
st.title("Kosten- und Strukturdaten der Krankenhäuser")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
|
487 |
+
# Initialize CromA client and handle collection
|
|
|
|
|
|
|
|
|
|
|
488 |
chroma_client = chromadb.Client()
|
|
|
|
|
489 |
try:
|
490 |
collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
|
491 |
except Exception as e:
|
|
|
494 |
else:
|
495 |
raise e
|
496 |
|
497 |
+
# Add documents to the collection if not already done
|
|
|
498 |
if "documents_added" not in st.session_state:
|
499 |
collection.add(
|
500 |
documents=[pdf_text],
|
|
|
503 |
)
|
504 |
st.session_state["documents_added"] = True
|
505 |
|
506 |
+
# Display chat history
|
507 |
display_chat_history(st.session_state['chat_history_page3'])
|
508 |
+
|
509 |
+
# User query input
|
510 |
query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
|
511 |
|
512 |
# Handling query input
|
513 |
if query:
|
514 |
full_query = ask_bot(query)
|
515 |
st.session_state['chat_history_page3'].append(("User", query, "new"))
|
516 |
+
|
517 |
+
# Query the CromA collection
|
|
|
|
|
|
|
518 |
results = collection.query(
|
519 |
query_texts=[full_query],
|
520 |
n_results=5 # Adjust the number of results as needed
|
521 |
)
|
522 |
+
|
|
|
|
|
|
|
|
|
523 |
# Process and display response from CromA results
|
524 |
if results:
|
|
|
525 |
response = f"Top result: {results[0]['text']}" # Example response using the first result
|
526 |
else:
|
527 |
response = "No results found for your query."
|
528 |
+
|
529 |
st.session_state['chat_history_page3'].append(("Eve", response, "new"))
|
530 |
|
531 |
+
# Display new messages at the bottom
|
532 |
+
new_messages = st.session_state['chat_history_page3'][-2:]
|
533 |
+
for chat in new_messages:
|
534 |
+
background_color = "#ffeecf"
|
535 |
+
st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
|
536 |
|
537 |
except Exception as e:
|
538 |
st.error(f"An error occurred: {repr(e)}")
|
539 |
|
540 |
|
|
|
|
|
|
|
541 |
def page4():
|
542 |
try:
|
543 |
st.header(":mailbox: Kontakt & Feedback!")
|