Geministreamlitwithvision

Runtime error

App Files Files Community

ziyadsuper2017 commited on Dec 21, 2023

Commit

94fea6c

1 Parent(s): e742fb1

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -24

app.py CHANGED Viewed

@@ -41,8 +41,6 @@ if 'chat_history' not in st.session_state:
     st.session_state['chat_history'] = []
 if 'file_uploader_key' not in st.session_state:
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
-if 'use_vision_model' not in st.session_state:
-    st.session_state['use_vision_model'] = False
 st.title("Gemini Chatbot")
@@ -57,7 +55,6 @@ def get_image_base64(image):
 def clear_conversation():
     st.session_state['chat_history'] = []
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
-    st.session_state['use_vision_model'] = False
 def display_chat_history():
     for entry in st.session_state['chat_history']:
@@ -78,38 +75,56 @@ def get_chat_history_str():
     return chat_history_str
 # Send message function with TTS integration
-def send_message(tts=False):
     user_input = st.session_state.user_input
     uploaded_files = st.session_state.uploaded_files
-    # Your existing code that processes user input and uploaded files...
-    model_name = 'gemini-pro-vision' if st.session_state['use_vision_model'] else 'gemini-pro'
     model = genai.GenerativeModel(
         model_name=model_name,
         generation_config=generation_config,
         safety_settings=safety_settings
     )
     chat_history_str = "\n".join(prompts)
-    if st.session_state['use_vision_model']:
-        prompt_parts = [{"text": chat_history_str}] + [
-            {"data": part['data'], "mime_type": "image/jpeg"}
-            for entry in st.session_state['chat_history'] for part in entry['parts']
-            if 'data' in part
-        ]
-    else:
-        prompt_parts = [{"text": chat_history_str}]
     response = model.generate_content([{"role": "user", "parts": prompt_parts}])
     response_text = response.text if hasattr(response, "text") else "No response text found."
     if response_text:
         st.session_state['chat_history'].append({"role": "model", "parts": [{"text": response_text}]})
-        # If TTS is enabled, convert the response text to speech
-        if tts:
-            tts = gTTS(text=response_text, lang='en')
-            tts_file = BytesIO()
-            tts.write_to_fp(tts_file)
-            tts_file.seek(0)
-            st.audio(tts_file, format='audio/mp3')
     # Clear the input fields after sending the message
     st.session_state.user_input = ''
@@ -122,6 +137,7 @@ def send_message(tts=False):
 # User input text area
 user_input = st.text_area(
     "Enter your message here:",
     key="user_input"
 )
@@ -136,8 +152,7 @@ uploaded_files = st.file_uploader(
 # Send message button
 send_button = st.button(
     "Send",
-    on_click=send_message,
-    args=(False,)  # TTS disabled by default when clicking the Send button
 )
 # Clear conversation button
@@ -163,7 +178,6 @@ st.session_state.uploaded_files = uploaded_files
 st.markdown(
     """
     <script>
-    // Use jQuery to capture the Ctrl+Enter event and click the 'Send' button
     document.addEventListener('DOMContentLoaded', (event) => {
         document.querySelector('.stTextArea textarea').addEventListener('keydown', function(e) {
             if (e.key === 'Enter' && e.ctrlKey) {

     st.session_state['chat_history'] = []
 if 'file_uploader_key' not in st.session_state:
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
 st.title("Gemini Chatbot")
 def clear_conversation():
     st.session_state['chat_history'] = []
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
 def display_chat_history():
     for entry in st.session_state['chat_history']:
     return chat_history_str
 # Send message function with TTS integration
+def send_message():
     user_input = st.session_state.user_input
     uploaded_files = st.session_state.uploaded_files
+    prompts = []
+    # Populate the prompts list with the existing chat history
+    for entry in st.session_state['chat_history']:
+        for part in entry['parts']:
+            if 'text' in part:
+                prompts.append(part['text'])
+            elif 'data' in part:
+                prompts.append("[Image]")
+    # Append the user input to the prompts list
+    if user_input:
+        prompts.append(user_input)
+        st.session_state['chat_history'].append({"role": "user", "parts": [{"text": user_input}]})
+    # Handle uploaded files
+    if uploaded_files:
+        for uploaded_file in uploaded_files:
+            base64_image = get_image_base64(Image.open(uploaded_file))
+            prompts.append("[Image]")
+            st.session_state['chat_history'].append({
+                "role": "user",
+                "parts": [{"mime_type": uploaded_file.type, "data": base64_image}]
+            })
+    # Set up the model and generate a response
+    model_name = 'gemini-pro-vision' if st.session_state.get('use_vision_model', False) else 'gemini-pro'
     model = genai.GenerativeModel(
         model_name=model_name,
         generation_config=generation_config,
         safety_settings=safety_settings
     )
     chat_history_str = "\n".join(prompts)
+    prompt_parts = [{"text": chat_history_str}]
     response = model.generate_content([{"role": "user", "parts": prompt_parts}])
     response_text = response.text if hasattr(response, "text") else "No response text found."
+    # After generating the response from the model, append it to the chat history
     if response_text:
         st.session_state['chat_history'].append({"role": "model", "parts": [{"text": response_text}]})
+        # Convert the response text to speech
+        tts = gTTS(text=response_text, lang='en')
+        tts_file = BytesIO()
+        tts.write_to_fp(tts_file)
+        tts_file.seek(0)
+        st.audio(tts_file, format='audio/mp3')
     # Clear the input fields after sending the message
     st.session_state.user_input = ''
 # User input text area
 user_input = st.text_area(
     "Enter your message here:",
+    value="",
     key="user_input"
 )
 # Send message button
 send_button = st.button(
     "Send",
+    on_click=send_message
 )
 # Clear conversation button
 st.markdown(
     """
     <script>
     document.addEventListener('DOMContentLoaded', (event) => {
         document.querySelector('.stTextArea textarea').addEventListener('keydown', function(e) {
             if (e.key === 'Enter' && e.ctrlKey) {