Spaces:

Anne31415
/

Public_BookBot

Sleeping

App Files Files Community

Anne31415 commited on Feb 6, 2024

Commit

9844e99

verified ·

1 Parent(s): 72b3b49

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -76

app.py CHANGED Viewed

@@ -16,12 +16,13 @@ from langchain.callbacks import get_openai_callback
 import os
 import uuid
 import json
 import pandas as pd
 import pydeck as pdk
 from urllib.error import URLError
 # Initialize session state variables
 if 'chat_history_page1' not in st.session_state:
     st.session_state['chat_history_page1'] = []
@@ -59,8 +60,6 @@ repo.git_pull()  # Pull the latest changes (if any)
 # Step 2: Load the PDF File
 pdf_path = "Private_Book/KH_Reform230124.pdf"  # Replace with your PDF file path
 pdf_path2 = "Private_Book/Buch_23012024.pdf"
@@ -70,21 +69,6 @@ pdf_path3 = "Private_Book/Kosten_Strukturdaten_RAG_vorbereited.pdf"
 api_key = os.getenv("OPENAI_API_KEY")
 # Retrieve the API key from st.secrets
-import chromadb
-@st.cache_data
-def extract_text_from_pdf(pdf_path):
-    text = ""
-    reader = PdfReader(pdf_path)
-    for page in reader.pages:
-        text += page.extract_text() + " "  # Concatenate text from each page
-    return text
-# Use the function to get pdf_text
-pdf_text = extract_text_from_pdf(pdf_path3)
 @st.cache_resource
@@ -132,23 +116,19 @@ def load_vector_store(file_path, store_name, force_reload=False):
     return VectorStore
-@st.cache_resource
 def load_pdf_text(file_path):
     pdf_reader = PdfReader(file_path)
     text = ""
     for page in pdf_reader.pages:
-        text += page.extract_text() or ""
     return text
-@st.cache_resource
 def load_chatbot():
     return load_qa_chain(llm=OpenAI(model_name="gpt-3.5-turbo-instruct"), chain_type="stuff")
 def display_chat_history(chat_history):
     for chat in chat_history:
         background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
@@ -499,38 +479,13 @@ def page2():
-# Correcting the indentation error and completing the CromA database integration in page3()
 def page3():
     try:
-        hide_streamlit_style = """
-                <style>
-                #MainMenu {visibility: hidden;}
-                footer {visibility: hidden;}
-                </style>
-                """
-        st.markdown(hide_streamlit_style, unsafe_allow_html=True)
-         # Create columns for layout
-        col1, col2 = st.columns([3, 1])  # Adjust the ratio to your liking
-        with col1:
-            st.title("Kosten- und Strukturdaten der Krankenhäuser")
-        with col2:
-            # Load and display the image in the right column, which will be the top-right corner of the page
-            image = Image.open('BinDoc Logo (Quadratisch).png')
-            st.image(image, use_column_width='always')
-        if not os.path.exists(pdf_path2):
-            st.error("File not found. Please check the file path.")
-            return
-        # Initialize CromA client
         chroma_client = chromadb.Client()
-        # Check if the collection already exists
         try:
             collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
         except Exception as e:
@@ -539,8 +494,7 @@ def page3():
             else:
                 raise e
-        # Add documents to the collection (ensure this is not done redundantly)
-        # It's important to make sure this step doesn't repeat unnecessarily on each rerun
         if "documents_added" not in st.session_state:
             collection.add(
                 documents=[pdf_text],
@@ -549,49 +503,41 @@ def page3():
             )
             st.session_state["documents_added"] = True
         display_chat_history(st.session_state['chat_history_page3'])
-        new_messages_placeholder = st.empty()
         query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
         # Handling query input
         if query:
             full_query = ask_bot(query)
             st.session_state['chat_history_page3'].append(("User", query, "new"))
-            # Start timing for response
-            start_time = time.time()
-            # Querying the CromA collection
             results = collection.query(
                 query_texts=[full_query],
                 n_results=5  # Adjust the number of results as needed
             )
-            # Calculate the response duration
-            end_time = time.time()
-            duration = end_time - start_time
             # Process and display response from CromA results
             if results:
-                # TODO: Adjust the following logic based on CromA's actual result structure
                 response = f"Top result: {results[0]['text']}"  # Example response using the first result
             else:
                 response = "No results found for your query."
             st.session_state['chat_history_page3'].append(("Eve", response, "new"))
-        # Simple interaction
-        if st.button("Test Button"):
-            st.write("Button clicked.")
     except Exception as e:
         st.error(f"An error occurred: {repr(e)}")
 def page4():
     try:
         st.header(":mailbox: Kontakt & Feedback!")

 import os
 import uuid
 import json
+import chromadb
 import pandas as pd
 import pydeck as pdk
 from urllib.error import URLError
 # Initialize session state variables
 if 'chat_history_page1' not in st.session_state:
     st.session_state['chat_history_page1'] = []
 # Step 2: Load the PDF File
 pdf_path = "Private_Book/KH_Reform230124.pdf"  # Replace with your PDF file path
 pdf_path2 = "Private_Book/Buch_23012024.pdf"
 api_key = os.getenv("OPENAI_API_KEY")
 # Retrieve the API key from st.secrets
 @st.cache_resource
     return VectorStore
+# Utility function to load text from a PDF
 def load_pdf_text(file_path):
     pdf_reader = PdfReader(file_path)
     text = ""
     for page in pdf_reader.pages:
+        text += page.extract_text() or ""  # Add fallback for pages where text extraction fails
     return text
 def load_chatbot():
+    #return load_qa_chain(llm=OpenAI(), chain_type="stuff")
     return load_qa_chain(llm=OpenAI(model_name="gpt-3.5-turbo-instruct"), chain_type="stuff")
 def display_chat_history(chat_history):
     for chat in chat_history:
         background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
 def page3():
     try:
+        # Basic layout setup
+        st.title("Kosten- und Strukturdaten der Krankenhäuser")
+        # Initialize CromA client and handle collection
         chroma_client = chromadb.Client()
         try:
             collection = chroma_client.create_collection(name="Kosten_Strukturdaten0602204")
         except Exception as e:
             else:
                 raise e
+        # Add documents to the collection if not already done
         if "documents_added" not in st.session_state:
             collection.add(
                 documents=[pdf_text],
             )
             st.session_state["documents_added"] = True
+        # Display chat history
         display_chat_history(st.session_state['chat_history_page3'])
+        # User query input
         query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
         # Handling query input
         if query:
             full_query = ask_bot(query)
             st.session_state['chat_history_page3'].append(("User", query, "new"))
+            # Query the CromA collection
             results = collection.query(
                 query_texts=[full_query],
                 n_results=5  # Adjust the number of results as needed
             )
             # Process and display response from CromA results
             if results:
                 response = f"Top result: {results[0]['text']}"  # Example response using the first result
             else:
                 response = "No results found for your query."
             st.session_state['chat_history_page3'].append(("Eve", response, "new"))
+        # Display new messages at the bottom
+        new_messages = st.session_state['chat_history_page3'][-2:]
+        for chat in new_messages:
+            background_color = "#ffeecf"
+            st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
     except Exception as e:
         st.error(f"An error occurred: {repr(e)}")
 def page4():
     try:
         st.header(":mailbox: Kontakt & Feedback!")