Spaces:
Runtime error
Runtime error
trying to add audio recording feature
Browse files
app.py
CHANGED
|
@@ -7,6 +7,9 @@ from gtts import gTTS
|
|
| 7 |
import google.generativeai as genai
|
| 8 |
from io import BytesIO
|
| 9 |
import PyPDF2
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Set your API key
|
| 12 |
api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg" # Replace with your actual API key
|
|
@@ -43,6 +46,10 @@ if 'chat_history' not in st.session_state:
|
|
| 43 |
st.session_state['chat_history'] = []
|
| 44 |
if 'file_uploader_key' not in st.session_state:
|
| 45 |
st.session_state['file_uploader_key'] = str(uuid.uuid4())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# --- Streamlit UI ---
|
| 48 |
st.title("Gemini Chatbot")
|
|
@@ -62,6 +69,7 @@ def get_file_base64(file_content, mime_type):
|
|
| 62 |
def clear_conversation():
|
| 63 |
st.session_state['chat_history'] = []
|
| 64 |
st.session_state['file_uploader_key'] = str(uuid.uuid4())
|
|
|
|
| 65 |
|
| 66 |
def display_chat_history():
|
| 67 |
chat_container = st.empty()
|
|
@@ -74,7 +82,7 @@ def display_chat_history():
|
|
| 74 |
elif 'data' in parts:
|
| 75 |
mime_type = parts.get('mime_type', '')
|
| 76 |
if mime_type.startswith('image'):
|
| 77 |
-
st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
|
| 78 |
caption='Uploaded Image', use_column_width=True)
|
| 79 |
elif mime_type == 'application/pdf':
|
| 80 |
st.write("**PDF Content:**")
|
|
@@ -87,10 +95,28 @@ def display_chat_history():
|
|
| 87 |
elif mime_type.startswith('video'):
|
| 88 |
st.video(io.BytesIO(base64.b64decode(parts['data'])))
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# --- Send Message Function ---
|
| 91 |
def send_message():
|
| 92 |
user_input = st.session_state.user_input
|
| 93 |
uploaded_files = st.session_state.uploaded_files
|
|
|
|
| 94 |
prompt_parts = []
|
| 95 |
|
| 96 |
# Add user input to the prompt
|
|
@@ -107,6 +133,15 @@ def send_message():
|
|
| 107 |
{"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
|
| 108 |
)
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
# Generate response using the selected model
|
| 111 |
try:
|
| 112 |
model = genai.GenerativeModel(
|
|
@@ -128,7 +163,7 @@ def send_message():
|
|
| 128 |
st.audio(tts_file, format='audio/mp3')
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
-
st.error(f"An error occurred: {e}")
|
| 132 |
|
| 133 |
st.session_state.user_input = ''
|
| 134 |
st.session_state.uploaded_files = []
|
|
@@ -161,6 +196,16 @@ uploaded_files = st.file_uploader(
|
|
| 161 |
key=st.session_state.file_uploader_key
|
| 162 |
)
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
# --- Other Buttons ---
|
| 165 |
st.button("Clear Conversation", on_click=clear_conversation)
|
| 166 |
|
|
|
|
| 7 |
import google.generativeai as genai
|
| 8 |
from io import BytesIO
|
| 9 |
import PyPDF2
|
| 10 |
+
import soundfile as sf
|
| 11 |
+
import librosa
|
| 12 |
+
import numpy as np
|
| 13 |
|
| 14 |
# Set your API key
|
| 15 |
api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg" # Replace with your actual API key
|
|
|
|
| 46 |
st.session_state['chat_history'] = []
|
| 47 |
if 'file_uploader_key' not in st.session_state:
|
| 48 |
st.session_state['file_uploader_key'] = str(uuid.uuid4())
|
| 49 |
+
if 'recording_enabled' not in st.session_state:
|
| 50 |
+
st.session_state['recording_enabled'] = False
|
| 51 |
+
if 'recorded_audio' not in st.session_state:
|
| 52 |
+
st.session_state['recorded_audio'] = None
|
| 53 |
|
| 54 |
# --- Streamlit UI ---
|
| 55 |
st.title("Gemini Chatbot")
|
|
|
|
| 69 |
def clear_conversation():
|
| 70 |
st.session_state['chat_history'] = []
|
| 71 |
st.session_state['file_uploader_key'] = str(uuid.uuid4())
|
| 72 |
+
st.session_state['recorded_audio'] = None
|
| 73 |
|
| 74 |
def display_chat_history():
|
| 75 |
chat_container = st.empty()
|
|
|
|
| 82 |
elif 'data' in parts:
|
| 83 |
mime_type = parts.get('mime_type', '')
|
| 84 |
if mime_type.startswith('image'):
|
| 85 |
+
st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
|
| 86 |
caption='Uploaded Image', use_column_width=True)
|
| 87 |
elif mime_type == 'application/pdf':
|
| 88 |
st.write("**PDF Content:**")
|
|
|
|
| 95 |
elif mime_type.startswith('video'):
|
| 96 |
st.video(io.BytesIO(base64.b64decode(parts['data'])))
|
| 97 |
|
| 98 |
+
# --- Audio Recording Functions ---
|
| 99 |
+
def start_recording():
|
| 100 |
+
st.session_state['recording_enabled'] = True
|
| 101 |
+
st.warning("Recording started. Click 'Stop Recording' to finish.")
|
| 102 |
+
|
| 103 |
+
def stop_recording():
|
| 104 |
+
st.session_state['recording_enabled'] = False
|
| 105 |
+
st.success("Recording stopped.")
|
| 106 |
+
|
| 107 |
+
def process_audio(audio_data):
|
| 108 |
+
# Convert to WAV format for compatibility
|
| 109 |
+
wav_data, samplerate = librosa.load(audio_data, sr=None)
|
| 110 |
+
sf.write("temp.wav", wav_data, samplerate, format="wav")
|
| 111 |
+
with open("temp.wav", "rb") as f:
|
| 112 |
+
wav_content = f.read()
|
| 113 |
+
return wav_content, "audio/wav"
|
| 114 |
+
|
| 115 |
# --- Send Message Function ---
|
| 116 |
def send_message():
|
| 117 |
user_input = st.session_state.user_input
|
| 118 |
uploaded_files = st.session_state.uploaded_files
|
| 119 |
+
recorded_audio = st.session_state.recorded_audio
|
| 120 |
prompt_parts = []
|
| 121 |
|
| 122 |
# Add user input to the prompt
|
|
|
|
| 133 |
{"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
|
| 134 |
)
|
| 135 |
|
| 136 |
+
# Handle recorded audio
|
| 137 |
+
if recorded_audio:
|
| 138 |
+
audio_content, audio_type = process_audio(recorded_audio)
|
| 139 |
+
prompt_parts.append(get_file_base64(audio_content, audio_type))
|
| 140 |
+
st.session_state['chat_history'].append(
|
| 141 |
+
{"role": "user", "parts": [get_file_base64(audio_content, audio_type)]}
|
| 142 |
+
)
|
| 143 |
+
st.session_state['recorded_audio'] = None # Reset recorded audio
|
| 144 |
+
|
| 145 |
# Generate response using the selected model
|
| 146 |
try:
|
| 147 |
model = genai.GenerativeModel(
|
|
|
|
| 163 |
st.audio(tts_file, format='audio/mp3')
|
| 164 |
|
| 165 |
except Exception as e:
|
| 166 |
+
st.error(f"An error occurred: {e}")
|
| 167 |
|
| 168 |
st.session_state.user_input = ''
|
| 169 |
st.session_state.uploaded_files = []
|
|
|
|
| 196 |
key=st.session_state.file_uploader_key
|
| 197 |
)
|
| 198 |
|
| 199 |
+
# --- Audio Recording ---
|
| 200 |
+
st.audio_recorder("Record audio:", key="recorded_audio")
|
| 201 |
+
col3, col4 = st.columns([1, 1])
|
| 202 |
+
with col3:
|
| 203 |
+
if st.button("Start Recording"):
|
| 204 |
+
start_recording()
|
| 205 |
+
with col4:
|
| 206 |
+
if st.button("Stop Recording"):
|
| 207 |
+
stop_recording()
|
| 208 |
+
|
| 209 |
# --- Other Buttons ---
|
| 210 |
st.button("Clear Conversation", on_click=clear_conversation)
|
| 211 |
|