Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,5 +1,6 @@ | |
| 1 | 
             
            import streamlit as st
         | 
| 2 | 
             
            import time
         | 
|  | |
| 3 | 
             
            from dotenv import load_dotenv
         | 
| 4 | 
             
            import pickle
         | 
| 5 | 
             
            from huggingface_hub import Repository
         | 
| @@ -17,15 +18,13 @@ import os | |
| 17 | 
             
            repo = Repository(
         | 
| 18 | 
             
                local_dir="Private_Book",  # Local directory to clone the repository
         | 
| 19 | 
             
                repo_type="dataset",  # Specify that this is a dataset repository
         | 
| 20 | 
            -
                
         | 
| 21 | 
             
                clone_from="Anne31415/Private_Book",  # Replace with your repository URL
         | 
| 22 | 
            -
                
         | 
| 23 | 
             
                token=os.environ["HUB_TOKEN"]  # Use the secret token to authenticate
         | 
| 24 | 
             
            )
         | 
| 25 | 
             
            repo.git_pull()  # Pull the latest changes (if any)
         | 
| 26 |  | 
| 27 | 
             
            # Step 2: Load the PDF File
         | 
| 28 | 
            -
             | 
| 29 |  | 
| 30 | 
             
            with st.sidebar:
         | 
| 31 | 
             
                st.title('BinDoc GmbH')
         | 
| @@ -50,136 +49,140 @@ with st.sidebar: | |
| 50 | 
             
                api_key = os.getenv("OPENAI_API_KEY")
         | 
| 51 | 
             
                # Retrieve the API key from st.secrets
         | 
| 52 |  | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
                if os.path.exists(f"{store_name}.pkl"):
         | 
| 70 | 
            -
                    with open(f"{store_name}.pkl", "rb") as f:
         | 
| 71 | 
            -
                        VectorStore = pickle.load(f)
         | 
| 72 | 
            -
                else:
         | 
| 73 | 
             
                    embeddings = OpenAIEmbeddings()
         | 
| 74 | 
             
                    VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
         | 
| 75 | 
             
                    with open(f"{store_name}.pkl", "wb") as f:
         | 
| 76 | 
             
                        pickle.dump(VectorStore, f)
         | 
|  | |
|  | |
|  | |
| 77 |  | 
| 78 | 
             
                return VectorStore
         | 
| 79 |  | 
| 80 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 81 |  | 
| 82 | 
             
            def load_chatbot():
         | 
| 83 | 
             
                return load_qa_chain(llm=OpenAI(), chain_type="stuff")
         | 
| 84 |  | 
| 85 | 
             
            def main():
         | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
             | 
| 89 | 
            -
             | 
| 90 | 
            -
             | 
| 91 | 
            -
             | 
| 92 | 
            -
             | 
| 93 | 
            -
             | 
| 94 | 
            -
             | 
| 95 | 
            -
             | 
| 96 | 
            -
                # Main content
         | 
| 97 | 
            -
                st.title("Welcome to BinDocs ChatBot! 🤖")
         | 
| 98 |  | 
| 99 | 
            -
             | 
| 100 | 
            -
             | 
| 101 | 
            -
                 | 
| 102 | 
            -
                     | 
| 103 | 
            -
                     | 
| 104 | 
            -
             | 
| 105 | 
            -
             | 
| 106 | 
            -
             | 
| 107 | 
            -
             | 
| 108 | 
            -
             | 
| 109 | 
            -
             | 
| 110 | 
            -
                 | 
| 111 | 
            -
             | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
| 114 | 
            -
             | 
| 115 | 
            -
             | 
| 116 | 
            -
             | 
| 117 | 
            -
             | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 120 | 
            -
                         | 
| 121 | 
            -
             | 
| 122 | 
            -
                        query = " | 
| 123 | 
            -
                    if st.button("Was sind die Vorteile des ambulanten operierens?"):
         | 
| 124 | 
            -
                        query = "Was sind die Vorteile des ambulanten operierens?"
         | 
| 125 | 
            -
                    if st.button("Was kann ich mit dem Prognose-Analyse Toll machen?"):
         | 
| 126 | 
            -
                        query = "Was kann ich mit dem Prognose-Analyse Toll machen?"
         | 
| 127 | 
            -
                    if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
         | 
| 128 | 
            -
                        query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
         | 
| 129 | 
            -
                    if st.button("Ich habe mein Meta Password vergessen, wie kann ich es zurücksetzen?"):
         | 
| 130 | 
            -
                        query = ("Ich habe mein Meta Password vergessen, wie kann ich es zurücksetzen?")
         | 
| 131 | 
            -
             | 
| 132 |  | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 137 | 
            -
                         | 
| 138 | 
            -
             | 
| 139 | 
            -
                         | 
| 140 | 
            -
             | 
| 141 | 
            -
             | 
| 142 | 
            -
             | 
| 143 | 
            -
                         | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
             | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 158 | 
            -
             | 
| 159 | 
            -
             | 
| 160 | 
            -
             | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
| 163 | 
            -
                             | 
| 164 | 
            -
                             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 167 | 
            -
             | 
| 168 | 
            -
             | 
| 169 | 
            -
             | 
| 170 | 
            -
             | 
| 171 | 
            -
             | 
| 172 | 
            -
             | 
| 173 | 
            -
             | 
| 174 | 
            -
             | 
| 175 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 176 |  | 
|  | |
|  | |
|  | |
| 177 |  | 
| 178 |  | 
| 179 | 
             
            def display_chat_history(chat_history):
         | 
| 180 | 
             
                for chat in chat_history:
         | 
| 181 | 
            -
                    background_color = "# | 
| 182 | 
             
                    st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
         | 
| 183 |  | 
|  | |
| 184 | 
             
            if __name__ == "__main__":
         | 
| 185 | 
             
                main()
         | 
|  | |
| 1 | 
             
            import streamlit as st
         | 
| 2 | 
             
            import time
         | 
| 3 | 
            +
            import streamlit_analytics
         | 
| 4 | 
             
            from dotenv import load_dotenv
         | 
| 5 | 
             
            import pickle
         | 
| 6 | 
             
            from huggingface_hub import Repository
         | 
|  | |
| 18 | 
             
            repo = Repository(
         | 
| 19 | 
             
                local_dir="Private_Book",  # Local directory to clone the repository
         | 
| 20 | 
             
                repo_type="dataset",  # Specify that this is a dataset repository
         | 
|  | |
| 21 | 
             
                clone_from="Anne31415/Private_Book",  # Replace with your repository URL
         | 
|  | |
| 22 | 
             
                token=os.environ["HUB_TOKEN"]  # Use the secret token to authenticate
         | 
| 23 | 
             
            )
         | 
| 24 | 
             
            repo.git_pull()  # Pull the latest changes (if any)
         | 
| 25 |  | 
| 26 | 
             
            # Step 2: Load the PDF File
         | 
| 27 | 
            +
            pdf_path = "Private_Book/KOMBI_all2.pdf"  # Replace with your PDF file path
         | 
| 28 |  | 
| 29 | 
             
            with st.sidebar:
         | 
| 30 | 
             
                st.title('BinDoc GmbH')
         | 
|  | |
| 49 | 
             
                api_key = os.getenv("OPENAI_API_KEY")
         | 
| 50 | 
             
                # Retrieve the API key from st.secrets
         | 
| 51 |  | 
| 52 | 
            +
            # Updated caching mechanism using st.cache_data
         | 
| 53 | 
            +
            @st.cache_data(persist="disk")  # Using persist="disk" to save cache across sessions
         | 
| 54 | 
            +
             | 
| 55 | 
            +
             | 
| 56 | 
            +
            def load_vector_store(file_path, store_name, force_reload=False):
         | 
| 57 | 
            +
                # Check if we need to force reload the vector store (e.g., when the PDF changes)
         | 
| 58 | 
            +
                if force_reload or not os.path.exists(f"{store_name}.pkl"):
         | 
| 59 | 
            +
                    text_splitter = RecursiveCharacterTextSplitter(
         | 
| 60 | 
            +
                        chunk_size=1000,
         | 
| 61 | 
            +
                        chunk_overlap=200,
         | 
| 62 | 
            +
                        length_function=len
         | 
| 63 | 
            +
                    )
         | 
| 64 | 
            +
                    
         | 
| 65 | 
            +
                    text = load_pdf_text(file_path)
         | 
| 66 | 
            +
                    chunks = text_splitter.split_text(text=text)
         | 
| 67 | 
            +
                    
         | 
|  | |
|  | |
|  | |
|  | |
| 68 | 
             
                    embeddings = OpenAIEmbeddings()
         | 
| 69 | 
             
                    VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
         | 
| 70 | 
             
                    with open(f"{store_name}.pkl", "wb") as f:
         | 
| 71 | 
             
                        pickle.dump(VectorStore, f)
         | 
| 72 | 
            +
                else:
         | 
| 73 | 
            +
                    with open(f"{store_name}.pkl", "rb") as f:
         | 
| 74 | 
            +
                        VectorStore = pickle.load(f)
         | 
| 75 |  | 
| 76 | 
             
                return VectorStore
         | 
| 77 |  | 
| 78 | 
            +
            # Utility function to load text from a PDF
         | 
| 79 | 
            +
            def load_pdf_text(file_path):
         | 
| 80 | 
            +
                pdf_reader = PdfReader(file_path)
         | 
| 81 | 
            +
                text = ""
         | 
| 82 | 
            +
                for page in pdf_reader.pages:
         | 
| 83 | 
            +
                    text += page.extract_text() or ""  # Add fallback for pages where text extraction fails
         | 
| 84 | 
            +
                return text
         | 
| 85 |  | 
| 86 | 
             
            def load_chatbot():
         | 
| 87 | 
             
                return load_qa_chain(llm=OpenAI(), chain_type="stuff")
         | 
| 88 |  | 
| 89 | 
             
            def main():
         | 
| 90 | 
            +
                try:
         | 
| 91 | 
            +
                    hide_streamlit_style = """
         | 
| 92 | 
            +
                            <style>
         | 
| 93 | 
            +
                            #MainMenu {visibility: hidden;}
         | 
| 94 | 
            +
                            footer {visibility: hidden;}
         | 
| 95 | 
            +
                            </style>
         | 
| 96 | 
            +
                            """
         | 
| 97 | 
            +
                    st.markdown(hide_streamlit_style, unsafe_allow_html=True)
         | 
|  | |
|  | |
|  | |
|  | |
| 98 |  | 
| 99 | 
            +
                    # Main content
         | 
| 100 | 
            +
                    st.title("Welcome to BinDocs ChatBot! 🤖")
         | 
| 101 | 
            +
                
         | 
| 102 | 
            +
                    # Start tracking user interactions
         | 
| 103 | 
            +
                    with streamlit_analytics.track():
         | 
| 104 | 
            +
                        if not os.path.exists(pdf_path):
         | 
| 105 | 
            +
                            st.error("File not found. Please check the file path.")
         | 
| 106 | 
            +
                            return
         | 
| 107 | 
            +
                
         | 
| 108 | 
            +
                        VectorStore = load_vector_store(pdf_path, "my_vector_store", force_reload=False)
         | 
| 109 | 
            +
                
         | 
| 110 | 
            +
                
         | 
| 111 | 
            +
                        if "chat_history" not in st.session_state:
         | 
| 112 | 
            +
                            st.session_state['chat_history'] = []
         | 
| 113 | 
            +
                
         | 
| 114 | 
            +
                        display_chat_history(st.session_state['chat_history'])
         | 
| 115 | 
            +
                
         | 
| 116 | 
            +
                        st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
         | 
| 117 | 
            +
                        st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
         | 
| 118 | 
            +
                        st.write("<!-- End Spacer -->", unsafe_allow_html=True)
         | 
| 119 | 
            +
                
         | 
| 120 | 
            +
                        new_messages_placeholder = st.empty()
         | 
| 121 | 
            +
                
         | 
| 122 | 
            +
                        query = st.text_input("Ask questions about your PDF file (in any preferred language):")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 123 |  | 
| 124 | 
            +
                        if st.button("Was genau ist ein Belegarzt?"):
         | 
| 125 | 
            +
                            query = "Was genau ist ein Belegarzt?"
         | 
| 126 | 
            +
                        if st.button("Wofür wird die Alpha-ID verwendet?"):
         | 
| 127 | 
            +
                            query = "Wofür wird die Alpha-ID verwendet?"
         | 
| 128 | 
            +
                        if st.button("Was sind die Vorteile des ambulanten Operierens?"):
         | 
| 129 | 
            +
                            query = "Was sind die Vorteile des ambulanten Operierens?"
         | 
| 130 | 
            +
                        if st.button("Was kann ich mit dem Prognose-Analyse-Tool machen?"):
         | 
| 131 | 
            +
                            query = "Was kann ich mit dem Prognose-Analyse-Tool machen?"
         | 
| 132 | 
            +
                        if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
         | 
| 133 | 
            +
                            query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
         | 
| 134 | 
            +
                        if st.button("Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"):
         | 
| 135 | 
            +
                            query = "Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"
         | 
| 136 | 
            +
                    
         | 
| 137 | 
            +
                        if query:
         | 
| 138 | 
            +
                            st.session_state['chat_history'].append(("User", query, "new"))
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                            # Start timing
         | 
| 141 | 
            +
                            start_time = time.time()
         | 
| 142 | 
            +
                            
         | 
| 143 | 
            +
                            with st.spinner('Bot is thinking...'):
         | 
| 144 | 
            +
                                # Use the VectorStore loaded at the start from the session state
         | 
| 145 | 
            +
                                chain = load_chatbot()
         | 
| 146 | 
            +
                                docs = VectorStore.similarity_search(query=query, k=3)
         | 
| 147 | 
            +
                                with get_openai_callback() as cb:
         | 
| 148 | 
            +
                                    response = chain.run(input_documents=docs, question=query)
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                                    
         | 
| 151 | 
            +
                            # Stop timing
         | 
| 152 | 
            +
                            end_time = time.time()
         | 
| 153 | 
            +
                            
         | 
| 154 | 
            +
                            # Calculate duration
         | 
| 155 | 
            +
                            duration = end_time - start_time
         | 
| 156 | 
            +
             | 
| 157 | 
            +
                            # You can use Streamlit's text function to display the timing
         | 
| 158 | 
            +
                            st.text(f"Response time: {duration:.2f} seconds")
         | 
| 159 | 
            +
                
         | 
| 160 | 
            +
                            st.session_state['chat_history'].append(("Bot", response, "new"))
         | 
| 161 | 
            +
                
         | 
| 162 | 
            +
                
         | 
| 163 | 
            +
                            # Display new messages at the bottom
         | 
| 164 | 
            +
                            new_messages = st.session_state['chat_history'][-2:]
         | 
| 165 | 
            +
                            for chat in new_messages:
         | 
| 166 | 
            +
                                background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
         | 
| 167 | 
            +
                                new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
         | 
| 168 | 
            +
                
         | 
| 169 | 
            +
                
         | 
| 170 | 
            +
                            # Clear the input field after the query is made
         | 
| 171 | 
            +
                            query = ""
         | 
| 172 | 
            +
                
         | 
| 173 | 
            +
                        # Mark all messages as old after displaying
         | 
| 174 | 
            +
                        st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
         | 
| 175 |  | 
| 176 | 
            +
                except Exception as e:
         | 
| 177 | 
            +
                    st.error(f"Upsi, an unexpected error occurred: {e}")
         | 
| 178 | 
            +
                    # Optionally log the exception details to a file or error tracking service
         | 
| 179 |  | 
| 180 |  | 
| 181 | 
             
            def display_chat_history(chat_history):
         | 
| 182 | 
             
                for chat in chat_history:
         | 
| 183 | 
            +
                    background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
         | 
| 184 | 
             
                    st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
         | 
| 185 |  | 
| 186 | 
            +
             | 
| 187 | 
             
            if __name__ == "__main__":
         | 
| 188 | 
             
                main()
         |