Geministreamlitwithvision

Runtime error

App Files Files Community

ziyadsuper2017 commited on May 20, 2024

Commit

7fd1c6d

verified ·

1 Parent(s): 698caa4

trying to add audio recording feature

Browse files

Files changed (1) hide show

app.py +47 -2

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ from gtts import gTTS
 import google.generativeai as genai
 from io import BytesIO
 import PyPDF2
 # Set your API key
 api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg"  # Replace with your actual API key
@@ -43,6 +46,10 @@ if 'chat_history' not in st.session_state:
     st.session_state['chat_history'] = []
 if 'file_uploader_key' not in st.session_state:
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
 # --- Streamlit UI ---
 st.title("Gemini Chatbot")
@@ -62,6 +69,7 @@ def get_file_base64(file_content, mime_type):
 def clear_conversation():
     st.session_state['chat_history'] = []
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
 def display_chat_history():
     chat_container = st.empty()
@@ -74,7 +82,7 @@ def display_chat_history():
             elif 'data' in parts:
                 mime_type = parts.get('mime_type', '')
                 if mime_type.startswith('image'):
-                    st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
                              caption='Uploaded Image', use_column_width=True)
                 elif mime_type == 'application/pdf':
                     st.write("**PDF Content:**")
@@ -87,10 +95,28 @@ def display_chat_history():
                 elif mime_type.startswith('video'):
                     st.video(io.BytesIO(base64.b64decode(parts['data'])))
 # --- Send Message Function ---
 def send_message():
     user_input = st.session_state.user_input
     uploaded_files = st.session_state.uploaded_files
     prompt_parts = []
     # Add user input to the prompt
@@ -107,6 +133,15 @@ def send_message():
                 {"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
             )
     # Generate response using the selected model
     try:
         model = genai.GenerativeModel(
@@ -128,7 +163,7 @@ def send_message():
                 st.audio(tts_file, format='audio/mp3')
     except Exception as e:
-        st.error(f"An error occurred: {e}")
     st.session_state.user_input = ''
     st.session_state.uploaded_files = []
@@ -161,6 +196,16 @@ uploaded_files = st.file_uploader(
     key=st.session_state.file_uploader_key
 )
 # --- Other Buttons ---
 st.button("Clear Conversation", on_click=clear_conversation)

 import google.generativeai as genai
 from io import BytesIO
 import PyPDF2
+import soundfile as sf
+import librosa
+import numpy as np
 # Set your API key
 api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg"  # Replace with your actual API key
     st.session_state['chat_history'] = []
 if 'file_uploader_key' not in st.session_state:
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
+if 'recording_enabled' not in st.session_state:
+    st.session_state['recording_enabled'] = False
+if 'recorded_audio' not in st.session_state:
+    st.session_state['recorded_audio'] = None
 # --- Streamlit UI ---
 st.title("Gemini Chatbot")
 def clear_conversation():
     st.session_state['chat_history'] = []
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
+    st.session_state['recorded_audio'] = None
 def display_chat_history():
     chat_container = st.empty()
             elif 'data' in parts:
                 mime_type = parts.get('mime_type', '')
                 if mime_type.startswith('image'):
+                    st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
                              caption='Uploaded Image', use_column_width=True)
                 elif mime_type == 'application/pdf':
                     st.write("**PDF Content:**")
                 elif mime_type.startswith('video'):
                     st.video(io.BytesIO(base64.b64decode(parts['data'])))
+# --- Audio Recording Functions ---
+def start_recording():
+    st.session_state['recording_enabled'] = True
+    st.warning("Recording started. Click 'Stop Recording' to finish.")
+def stop_recording():
+    st.session_state['recording_enabled'] = False
+    st.success("Recording stopped.")
+def process_audio(audio_data):
+    # Convert to WAV format for compatibility
+    wav_data, samplerate = librosa.load(audio_data, sr=None)
+    sf.write("temp.wav", wav_data, samplerate, format="wav")
+    with open("temp.wav", "rb") as f:
+        wav_content = f.read()
+    return wav_content, "audio/wav"
 # --- Send Message Function ---
 def send_message():
     user_input = st.session_state.user_input
     uploaded_files = st.session_state.uploaded_files
+    recorded_audio = st.session_state.recorded_audio
     prompt_parts = []
     # Add user input to the prompt
                 {"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
             )
+    # Handle recorded audio
+    if recorded_audio:
+        audio_content, audio_type = process_audio(recorded_audio)
+        prompt_parts.append(get_file_base64(audio_content, audio_type))
+        st.session_state['chat_history'].append(
+            {"role": "user", "parts": [get_file_base64(audio_content, audio_type)]}
+        )
+        st.session_state['recorded_audio'] = None  # Reset recorded audio
     # Generate response using the selected model
     try:
         model = genai.GenerativeModel(
                 st.audio(tts_file, format='audio/mp3')
     except Exception as e:
+        st.error(f"An error occurred: {e}")
     st.session_state.user_input = ''
     st.session_state.uploaded_files = []
     key=st.session_state.file_uploader_key
 )
+# --- Audio Recording ---
+st.audio_recorder("Record audio:", key="recorded_audio")
+col3, col4 = st.columns([1, 1])
+with col3:
+    if st.button("Start Recording"):
+        start_recording()
+with col4:
+    if st.button("Stop Recording"):
+        stop_recording()
 # --- Other Buttons ---
 st.button("Clear Conversation", on_click=clear_conversation)