Spaces:
Runtime error
Runtime error
trying to add audio recording feature
Browse files
app.py
CHANGED
@@ -7,6 +7,9 @@ from gtts import gTTS
|
|
7 |
import google.generativeai as genai
|
8 |
from io import BytesIO
|
9 |
import PyPDF2
|
|
|
|
|
|
|
10 |
|
11 |
# Set your API key
|
12 |
api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg" # Replace with your actual API key
|
@@ -43,6 +46,10 @@ if 'chat_history' not in st.session_state:
|
|
43 |
st.session_state['chat_history'] = []
|
44 |
if 'file_uploader_key' not in st.session_state:
|
45 |
st.session_state['file_uploader_key'] = str(uuid.uuid4())
|
|
|
|
|
|
|
|
|
46 |
|
47 |
# --- Streamlit UI ---
|
48 |
st.title("Gemini Chatbot")
|
@@ -62,6 +69,7 @@ def get_file_base64(file_content, mime_type):
|
|
62 |
def clear_conversation():
|
63 |
st.session_state['chat_history'] = []
|
64 |
st.session_state['file_uploader_key'] = str(uuid.uuid4())
|
|
|
65 |
|
66 |
def display_chat_history():
|
67 |
chat_container = st.empty()
|
@@ -74,7 +82,7 @@ def display_chat_history():
|
|
74 |
elif 'data' in parts:
|
75 |
mime_type = parts.get('mime_type', '')
|
76 |
if mime_type.startswith('image'):
|
77 |
-
st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
|
78 |
caption='Uploaded Image', use_column_width=True)
|
79 |
elif mime_type == 'application/pdf':
|
80 |
st.write("**PDF Content:**")
|
@@ -87,10 +95,28 @@ def display_chat_history():
|
|
87 |
elif mime_type.startswith('video'):
|
88 |
st.video(io.BytesIO(base64.b64decode(parts['data'])))
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
# --- Send Message Function ---
|
91 |
def send_message():
|
92 |
user_input = st.session_state.user_input
|
93 |
uploaded_files = st.session_state.uploaded_files
|
|
|
94 |
prompt_parts = []
|
95 |
|
96 |
# Add user input to the prompt
|
@@ -107,6 +133,15 @@ def send_message():
|
|
107 |
{"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
|
108 |
)
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
# Generate response using the selected model
|
111 |
try:
|
112 |
model = genai.GenerativeModel(
|
@@ -128,7 +163,7 @@ def send_message():
|
|
128 |
st.audio(tts_file, format='audio/mp3')
|
129 |
|
130 |
except Exception as e:
|
131 |
-
st.error(f"An error occurred: {e}")
|
132 |
|
133 |
st.session_state.user_input = ''
|
134 |
st.session_state.uploaded_files = []
|
@@ -161,6 +196,16 @@ uploaded_files = st.file_uploader(
|
|
161 |
key=st.session_state.file_uploader_key
|
162 |
)
|
163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
# --- Other Buttons ---
|
165 |
st.button("Clear Conversation", on_click=clear_conversation)
|
166 |
|
|
|
7 |
import google.generativeai as genai
|
8 |
from io import BytesIO
|
9 |
import PyPDF2
|
10 |
+
import soundfile as sf
|
11 |
+
import librosa
|
12 |
+
import numpy as np
|
13 |
|
14 |
# Set your API key
|
15 |
api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg" # Replace with your actual API key
|
|
|
46 |
st.session_state['chat_history'] = []
|
47 |
if 'file_uploader_key' not in st.session_state:
|
48 |
st.session_state['file_uploader_key'] = str(uuid.uuid4())
|
49 |
+
if 'recording_enabled' not in st.session_state:
|
50 |
+
st.session_state['recording_enabled'] = False
|
51 |
+
if 'recorded_audio' not in st.session_state:
|
52 |
+
st.session_state['recorded_audio'] = None
|
53 |
|
54 |
# --- Streamlit UI ---
|
55 |
st.title("Gemini Chatbot")
|
|
|
69 |
def clear_conversation():
|
70 |
st.session_state['chat_history'] = []
|
71 |
st.session_state['file_uploader_key'] = str(uuid.uuid4())
|
72 |
+
st.session_state['recorded_audio'] = None
|
73 |
|
74 |
def display_chat_history():
|
75 |
chat_container = st.empty()
|
|
|
82 |
elif 'data' in parts:
|
83 |
mime_type = parts.get('mime_type', '')
|
84 |
if mime_type.startswith('image'):
|
85 |
+
st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
|
86 |
caption='Uploaded Image', use_column_width=True)
|
87 |
elif mime_type == 'application/pdf':
|
88 |
st.write("**PDF Content:**")
|
|
|
95 |
elif mime_type.startswith('video'):
|
96 |
st.video(io.BytesIO(base64.b64decode(parts['data'])))
|
97 |
|
98 |
+
# --- Audio Recording Functions ---
|
99 |
+
def start_recording():
|
100 |
+
st.session_state['recording_enabled'] = True
|
101 |
+
st.warning("Recording started. Click 'Stop Recording' to finish.")
|
102 |
+
|
103 |
+
def stop_recording():
|
104 |
+
st.session_state['recording_enabled'] = False
|
105 |
+
st.success("Recording stopped.")
|
106 |
+
|
107 |
+
def process_audio(audio_data):
|
108 |
+
# Convert to WAV format for compatibility
|
109 |
+
wav_data, samplerate = librosa.load(audio_data, sr=None)
|
110 |
+
sf.write("temp.wav", wav_data, samplerate, format="wav")
|
111 |
+
with open("temp.wav", "rb") as f:
|
112 |
+
wav_content = f.read()
|
113 |
+
return wav_content, "audio/wav"
|
114 |
+
|
115 |
# --- Send Message Function ---
|
116 |
def send_message():
|
117 |
user_input = st.session_state.user_input
|
118 |
uploaded_files = st.session_state.uploaded_files
|
119 |
+
recorded_audio = st.session_state.recorded_audio
|
120 |
prompt_parts = []
|
121 |
|
122 |
# Add user input to the prompt
|
|
|
133 |
{"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
|
134 |
)
|
135 |
|
136 |
+
# Handle recorded audio
|
137 |
+
if recorded_audio:
|
138 |
+
audio_content, audio_type = process_audio(recorded_audio)
|
139 |
+
prompt_parts.append(get_file_base64(audio_content, audio_type))
|
140 |
+
st.session_state['chat_history'].append(
|
141 |
+
{"role": "user", "parts": [get_file_base64(audio_content, audio_type)]}
|
142 |
+
)
|
143 |
+
st.session_state['recorded_audio'] = None # Reset recorded audio
|
144 |
+
|
145 |
# Generate response using the selected model
|
146 |
try:
|
147 |
model = genai.GenerativeModel(
|
|
|
163 |
st.audio(tts_file, format='audio/mp3')
|
164 |
|
165 |
except Exception as e:
|
166 |
+
st.error(f"An error occurred: {e}")
|
167 |
|
168 |
st.session_state.user_input = ''
|
169 |
st.session_state.uploaded_files = []
|
|
|
196 |
key=st.session_state.file_uploader_key
|
197 |
)
|
198 |
|
199 |
+
# --- Audio Recording ---
|
200 |
+
st.audio_recorder("Record audio:", key="recorded_audio")
|
201 |
+
col3, col4 = st.columns([1, 1])
|
202 |
+
with col3:
|
203 |
+
if st.button("Start Recording"):
|
204 |
+
start_recording()
|
205 |
+
with col4:
|
206 |
+
if st.button("Stop Recording"):
|
207 |
+
stop_recording()
|
208 |
+
|
209 |
# --- Other Buttons ---
|
210 |
st.button("Clear Conversation", on_click=clear_conversation)
|
211 |
|