Geministreamlitwithvision

Runtime error

App Files Files Community

ziyadsuper2017 commited on May 20, 2024

Commit

4479cfb

verified ·

1 Parent(s): a6a2d49

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -45

app.py CHANGED Viewed

@@ -7,9 +7,7 @@ from gtts import gTTS
 import google.generativeai as genai
 from io import BytesIO
 import PyPDF2
-import soundfile as sf
-import librosa
-import numpy as np
 # Set your API key
 api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg"  # Replace with your actual API key
@@ -46,10 +44,6 @@ if 'chat_history' not in st.session_state:
     st.session_state['chat_history'] = []
 if 'file_uploader_key' not in st.session_state:
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
-if 'recording_enabled' not in st.session_state:
-    st.session_state['recording_enabled'] = False
-if 'recorded_audio' not in st.session_state:
-    st.session_state['recorded_audio'] = None
 # --- Streamlit UI ---
 st.title("Gemini Chatbot")
@@ -69,14 +63,13 @@ def get_file_base64(file_content, mime_type):
 def clear_conversation():
     st.session_state['chat_history'] = []
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
-    st.session_state['recorded_audio'] = None
 def display_chat_history():
     chat_container = st.empty()
     with chat_container.container():
         for entry in st.session_state['chat_history']:
             role = entry["role"]
-            parts = entry["parts"][0]
             if 'text' in parts:
                 st.markdown(f"**{role.title()}:** {parts['text']}")
             elif 'data' in parts:
@@ -85,7 +78,7 @@ def display_chat_history():
                     st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
                              caption='Uploaded Image', use_column_width=True)
                 elif mime_type == 'application/pdf':
-                    st.write("**PDF Content:**")
                     pdf_reader = PyPDF2.PdfReader(io.BytesIO(base64.b64decode(parts['data'])))
                     for page_num in range(len(pdf_reader.pages)):
                         page = pdf_reader.pages[page_num]
@@ -95,28 +88,10 @@ def display_chat_history():
                 elif mime_type.startswith('video'):
                     st.video(io.BytesIO(base64.b64decode(parts['data'])))
-# --- Audio Recording Functions ---
-def start_recording():
-    st.session_state['recording_enabled'] = True
-    st.warning("Recording started. Click 'Stop Recording' to finish.")
-def stop_recording():
-    st.session_state['recording_enabled'] = False
-    st.success("Recording stopped.")
-def process_audio(audio_data):
-    # Convert to WAV format for compatibility
-    wav_data, samplerate = librosa.load(audio_data, sr=None)
-    sf.write("temp.wav", wav_data, samplerate, format="wav")
-    with open("temp.wav", "rb") as f:
-        wav_content = f.read()
-    return wav_content, "audio/wav"
 # --- Send Message Function ---
-def send_message():
     user_input = st.session_state.user_input
     uploaded_files = st.session_state.uploaded_files
-    recorded_audio = st.session_state.recorded_audio
     prompt_parts = []
     # Add user input to the prompt
@@ -133,14 +108,12 @@ def send_message():
                 {"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
             )
-    # Handle recorded audio
-    if recorded_audio:
-        audio_content, audio_type = process_audio(recorded_audio)
-        prompt_parts.append(get_file_base64(audio_content, audio_type))
         st.session_state['chat_history'].append(
-            {"role": "user", "parts": [get_file_base64(audio_content, audio_type)]}
         )
-        st.session_state['recorded_audio'] = None  # Reset recorded audio
     # Generate response using the selected model
     try:
@@ -174,7 +147,6 @@ def send_message():
 # --- User Input Area ---
 col1, col2 = st.columns([3, 1])
 with col1:
     user_input = st.text_area(
         "Enter your message:",
@@ -196,15 +168,25 @@ uploaded_files = st.file_uploader(
     key=st.session_state.file_uploader_key
 )
-# --- Audio Recording ---
-st.audio_recorder("Record audio:", key="recorded_audio")
-col3, col4 = st.columns([1, 1])
-with col3:
-    if st.button("Start Recording"):
-        start_recording()
-with col4:
-    if st.button("Stop Recording"):
-        stop_recording()
 # --- Other Buttons ---
 st.button("Clear Conversation", on_click=clear_conversation)

 import google.generativeai as genai
 from io import BytesIO
 import PyPDF2
+from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
 # Set your API key
 api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg"  # Replace with your actual API key
     st.session_state['chat_history'] = []
 if 'file_uploader_key' not in st.session_state:
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
 # --- Streamlit UI ---
 st.title("Gemini Chatbot")
 def clear_conversation():
     st.session_state['chat_history'] = []
     st.session_state['file_uploader_key'] = str(uuid.uuid4())
 def display_chat_history():
     chat_container = st.empty()
     with chat_container.container():
         for entry in st.session_state['chat_history']:
             role = entry["role"]
+            parts = entry["parts"][0]
             if 'text' in parts:
                 st.markdown(f"**{role.title()}:** {parts['text']}")
             elif 'data' in parts:
                     st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
                              caption='Uploaded Image', use_column_width=True)
                 elif mime_type == 'application/pdf':
+                    st.write("**PDF Content:**")
                     pdf_reader = PyPDF2.PdfReader(io.BytesIO(base64.b64decode(parts['data'])))
                     for page_num in range(len(pdf_reader.pages)):
                         page = pdf_reader.pages[page_num]
                 elif mime_type.startswith('video'):
                     st.video(io.BytesIO(base64.b64decode(parts['data'])))
 # --- Send Message Function ---
+def send_message(audio_data=None):
     user_input = st.session_state.user_input
     uploaded_files = st.session_state.uploaded_files
     prompt_parts = []
     # Add user input to the prompt
                 {"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
             )
+    # Handle audio data from WebRTC
+    if audio_data:
+        prompt_parts.append(get_file_base64(audio_data, 'audio/wav'))
         st.session_state['chat_history'].append(
+            {"role": "user", "parts": [get_file_base64(audio_data, 'audio/wav')]}
         )
     # Generate response using the selected model
     try:
 # --- User Input Area ---
 col1, col2 = st.columns([3, 1])
 with col1:
     user_input = st.text_area(
         "Enter your message:",
     key=st.session_state.file_uploader_key
 )
+# --- WebRTC Audio Recording ---
+RTC_CONFIGURATION = RTCConfiguration({"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]})
+webrtc_ctx = webrtc_streamer(
+    key="audio-recorder",
+    mode=WebRtcMode.SENDONLY,
+    rtc_configuration=RTC_CONFIGURATION,
+    audio_receiver_size=256,
+    media_stream_constraints={"video": False, "audio": True},
+)
+if webrtc_ctx.audio_receiver:
+    st.write("Recording audio...")
+    audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=None)
+    audio_data = b"".join([frame for frame in audio_frames])
+    # Send the recorded audio when the "Send" button is clicked
+    if st.button("Send Recording"):
+        send_message(audio_data=audio_data)
 # --- Other Buttons ---
 st.button("Clear Conversation", on_click=clear_conversation)