Geministreamlitwithvision

Runtime error

App Files Files Community

ziyadsuper2017 commited on May 20, 2024

Commit

d5a9ec5

verified ·

1 Parent(s): 672cfd7

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -96

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ genai.configure(api_key=api_key)
 # Configure the generative AI model
 generation_config = genai.GenerationConfig(
     temperature=0.9,
-    max_output_tokens=4000
 )
 # Safety settings configuration
@@ -44,15 +44,17 @@ if 'chat_history' not in st.session_state:
 if 'file_uploader_key' not in st.session_state:
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
 st.title("Gemini Chatbot")
 # Model Selection Dropdown
-selected_model = st.selectbox("Select a Gemini 1.5 model:", ["gemini-1.5-flash-latest", "gemini-1.5-pro-latest"])
 # TTS Option Checkbox
 enable_tts = st.checkbox("Enable Text-to-Speech")
-# Helper functions for image processing and chat history management
 def get_image_base64(image):
     image = image.convert("RGB")
     buffered = io.BytesIO()
@@ -69,13 +71,13 @@ def display_chat_history():
         role = entry["role"]
         parts = entry["parts"][0]
         if 'text' in parts:
-            st.markdown(f"{role.title()}: {parts['text']}")
         elif 'data' in parts:
             mime_type = parts.get('mime_type', '')
             if mime_type.startswith('image'):
-                st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))), caption='Uploaded Image')
             elif mime_type == 'application/pdf':
-                st.write("PDF Content:")
                 pdf_reader = PyPDF2.PdfReader(io.BytesIO(base64.b64decode(parts['data'])))
                 for page_num in range(len(pdf_reader.pages)):
                     page = pdf_reader.pages[page_num]
@@ -83,83 +85,37 @@ def display_chat_history():
             elif mime_type.startswith('video'):
                 st.video(io.BytesIO(base64.b64decode(parts['data'])))
-def get_chat_history_str():
-    chat_history_str = "\n".join(
-        f"{entry['role'].title()}: {part['text']}" if 'text' in part
-        else f"{entry['role'].title()}: (File: {part.get('mime_type', '')})"
-        for entry in st.session_state['chat_history']
-        for part in entry['parts']
-    )
-    return chat_history_str
-# Send message function with TTS integration
 def send_message():
     user_input = st.session_state.user_input
     uploaded_files = st.session_state.uploaded_files
-    prompts = []
     prompt_parts = []
-    # Populate the prompts list with the existing chat history
-    for entry in st.session_state['chat_history']:
-        for part in entry['parts']:
-            if 'text' in part:
-                prompts.append(part['text'])
-            elif 'data' in part:
-                prompts.append(f"(File: {part.get('mime_type', '')})")
-                prompt_parts.append(part)  # Add the entire part
-    # Append the user input to the prompts list
     if user_input:
-        prompts.append(user_input)
-        st.session_state['chat_history'].append({"role": "user", "parts": [{"text": user_input}]})
         prompt_parts.append({"text": user_input})
     # Handle uploaded files
     if uploaded_files:
         for uploaded_file in uploaded_files:
             file_content = uploaded_file.read()
             base64_data = base64.b64encode(file_content).decode()
-            prompts.append(f"(File: {uploaded_file.type})")
-            prompt_parts.append({
-                "mime_type": uploaded_file.type,
-                "data": base64_data
-            })
-            st.session_state['chat_history'].append({
-                "role": "user",
-                "parts": [{"mime_type": uploaded_file.type, "data": base64_data}]
-            })
-    # Determine if vision model should be used
-    use_vision_model = any(part.get('mime_type') == 'image/jpeg' for part in prompt_parts)
-    # Use the selected model
-    model_name = selected_model
-    if use_vision_model and "pro" not in model_name:
-        st.warning(f"The selected model ({model_name}) does not support image inputs. Choose a 'pro' model for image capabilities.")
-        return
     model = genai.GenerativeModel(
-        model_name=model_name,
         generation_config=generation_config,
         safety_settings=safety_settings
     )
-    chat_history_str = "\n".join(prompts)
-    if use_vision_model:
-        # Include text and images for vision model
-        generated_prompt = {"role": "user", "parts": prompt_parts}
-    else:
-        # Include text only for standard model
-        generated_prompt = {"role": "user", "parts": [{"text": chat_history_str}]}
-    response = model.generate_content([generated_prompt])
     response_text = response.text if hasattr(response, "text") else "No response text found."
-    # After generating the response from the model, append it to the chat history
     if response_text:
         st.session_state['chat_history'].append({"role": "model", "parts": [{"text": response_text}]})
-        # Convert the response text to speech if enabled
         if enable_tts:
             tts = gTTS(text=response_text, lang='en')
             tts_file = BytesIO()
@@ -167,55 +123,42 @@ def send_message():
             tts_file.seek(0)
             st.audio(tts_file, format='audio/mp3')
-    # Clear the input fields after sending the message
     st.session_state.user_input = ''
     st.session_state.uploaded_files = []
     st.session_state.file_uploader_key = str(uuid.uuid4())
-    # Display the updated chat history
     display_chat_history()
-# User input text area
-user_input = st.text_area(
-    "Enter your message here:",
-    value="",
-    key="user_input"
-)
-# File uploader for images
 uploaded_files = st.file_uploader(
-    "Upload files:",
-    type=["png", "jpg", "jpeg", "mp4", "pdf"],  # Added mp4 and pdf
     accept_multiple_files=True,
     key=st.session_state.file_uploader_key
 )
-# Send message button
-send_button = st.button(
-    "Send",
-    on_click=send_message
-)
-# Clear conversation button
-clear_button = st.button("Clear Conversation", on_click=clear_conversation)
-# Function to download the chat history as a text file
-def download_chat_history():
-    chat_history_str = get_chat_history_str()
-    return chat_history_str
-# Download button for the chat history
-download_button = st.download_button(
-    label="Download Chat",
-    data=download_chat_history(),
-    file_name="chat_history.txt",
-    mime="text/plain"
-)
-# Ensure the file_uploader widget state is tied to the randomly generated key
 st.session_state.uploaded_files = uploaded_files
-# JavaScript to capture the Ctrl+Enter event and trigger a button click
 st.markdown(
     """
     <script>
@@ -230,4 +173,7 @@ st.markdown(
     </script>
     """,
     unsafe_allow_html=True
-)

 # Configure the generative AI model
 generation_config = genai.GenerationConfig(
     temperature=0.9,
+    max_output_tokens=3000
 )
 # Safety settings configuration
 if 'file_uploader_key' not in st.session_state:
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
+# --- Streamlit UI ---
 st.title("Gemini Chatbot")
+st.write("Interact with the powerful Gemini 1.5 models.")
 # Model Selection Dropdown
+selected_model = st.selectbox("Choose a Gemini 1.5 Model:", ["gemini-1.5-flash-latest", "gemini-1.5-pro-latest"])
 # TTS Option Checkbox
 enable_tts = st.checkbox("Enable Text-to-Speech")
+# --- Helper Functions ---
 def get_image_base64(image):
     image = image.convert("RGB")
     buffered = io.BytesIO()
         role = entry["role"]
         parts = entry["parts"][0]
         if 'text' in parts:
+            st.markdown(f"**{role.title()}:** {parts['text']}")
         elif 'data' in parts:
             mime_type = parts.get('mime_type', '')
             if mime_type.startswith('image'):
+                st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))), caption='Uploaded Image', use_column_width=True)
             elif mime_type == 'application/pdf':
+                st.write("**PDF Content:**")
                 pdf_reader = PyPDF2.PdfReader(io.BytesIO(base64.b64decode(parts['data'])))
                 for page_num in range(len(pdf_reader.pages)):
                     page = pdf_reader.pages[page_num]
             elif mime_type.startswith('video'):
                 st.video(io.BytesIO(base64.b64decode(parts['data'])))
+# --- Send Message Function ---
 def send_message():
     user_input = st.session_state.user_input
     uploaded_files = st.session_state.uploaded_files
     prompt_parts = []
+    # Add user input to the prompt
     if user_input:
         prompt_parts.append({"text": user_input})
+        st.session_state['chat_history'].append({"role": "user", "parts": [{"text": user_input}]})
     # Handle uploaded files
     if uploaded_files:
         for uploaded_file in uploaded_files:
             file_content = uploaded_file.read()
             base64_data = base64.b64encode(file_content).decode()
+            prompt_parts.append({"mime_type": uploaded_file.type, "data": base64_data})
+            st.session_state['chat_history'].append({"role": "user", "parts": [{"mime_type": uploaded_file.type, "data": base64_data}]})
+    # Generate response using the selected model
     model = genai.GenerativeModel(
+        model_name=selected_model,
         generation_config=generation_config,
         safety_settings=safety_settings
     )
+    response = model.generate_content([{"role": "user", "parts": prompt_parts}])
     response_text = response.text if hasattr(response, "text") else "No response text found."
     if response_text:
         st.session_state['chat_history'].append({"role": "model", "parts": [{"text": response_text}]})
         if enable_tts:
             tts = gTTS(text=response_text, lang='en')
             tts_file = BytesIO()
             tts_file.seek(0)
             st.audio(tts_file, format='audio/mp3')
     st.session_state.user_input = ''
     st.session_state.uploaded_files = []
     st.session_state.file_uploader_key = str(uuid.uuid4())
     display_chat_history()
+# --- User Input Area ---
+col1, col2 = st.columns([3, 1])
+with col1:
+    user_input = st.text_area(
+        "Enter your message:",
+        value="",
+        key="user_input"
+    )
+with col2:
+    send_button = st.button(
+        "Send",
+        on_click=send_message,
+        type="primary" # Makes the Send button prominent
+    )
+# --- File Uploader ---
 uploaded_files = st.file_uploader(
+    "Upload Files (Images, Videos, PDFs):",
+    type=["png", "jpg", "jpeg", "mp4", "pdf"],
     accept_multiple_files=True,
     key=st.session_state.file_uploader_key
 )
+# --- Other Buttons ---
+st.button("Clear Conversation", on_click=clear_conversation)
+# --- Ensure file_uploader state ---
 st.session_state.uploaded_files = uploaded_files
+# --- JavaScript for Ctrl+Enter ---
 st.markdown(
     """
     <script>
     </script>
     """,
     unsafe_allow_html=True
+)
+# --- Display Chat History ---
+display_chat_history()