ziyadsuper2017 commited on
Commit
7fd1c6d
·
verified ·
1 Parent(s): 698caa4

trying to add audio recording feature

Browse files
Files changed (1) hide show
  1. app.py +47 -2
app.py CHANGED
@@ -7,6 +7,9 @@ from gtts import gTTS
7
  import google.generativeai as genai
8
  from io import BytesIO
9
  import PyPDF2
 
 
 
10
 
11
  # Set your API key
12
  api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg" # Replace with your actual API key
@@ -43,6 +46,10 @@ if 'chat_history' not in st.session_state:
43
  st.session_state['chat_history'] = []
44
  if 'file_uploader_key' not in st.session_state:
45
  st.session_state['file_uploader_key'] = str(uuid.uuid4())
 
 
 
 
46
 
47
  # --- Streamlit UI ---
48
  st.title("Gemini Chatbot")
@@ -62,6 +69,7 @@ def get_file_base64(file_content, mime_type):
62
  def clear_conversation():
63
  st.session_state['chat_history'] = []
64
  st.session_state['file_uploader_key'] = str(uuid.uuid4())
 
65
 
66
  def display_chat_history():
67
  chat_container = st.empty()
@@ -74,7 +82,7 @@ def display_chat_history():
74
  elif 'data' in parts:
75
  mime_type = parts.get('mime_type', '')
76
  if mime_type.startswith('image'):
77
- st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
78
  caption='Uploaded Image', use_column_width=True)
79
  elif mime_type == 'application/pdf':
80
  st.write("**PDF Content:**")
@@ -87,10 +95,28 @@ def display_chat_history():
87
  elif mime_type.startswith('video'):
88
  st.video(io.BytesIO(base64.b64decode(parts['data'])))
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # --- Send Message Function ---
91
  def send_message():
92
  user_input = st.session_state.user_input
93
  uploaded_files = st.session_state.uploaded_files
 
94
  prompt_parts = []
95
 
96
  # Add user input to the prompt
@@ -107,6 +133,15 @@ def send_message():
107
  {"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
108
  )
109
 
 
 
 
 
 
 
 
 
 
110
  # Generate response using the selected model
111
  try:
112
  model = genai.GenerativeModel(
@@ -128,7 +163,7 @@ def send_message():
128
  st.audio(tts_file, format='audio/mp3')
129
 
130
  except Exception as e:
131
- st.error(f"An error occurred: {e}")
132
 
133
  st.session_state.user_input = ''
134
  st.session_state.uploaded_files = []
@@ -161,6 +196,16 @@ uploaded_files = st.file_uploader(
161
  key=st.session_state.file_uploader_key
162
  )
163
 
 
 
 
 
 
 
 
 
 
 
164
  # --- Other Buttons ---
165
  st.button("Clear Conversation", on_click=clear_conversation)
166
 
 
7
  import google.generativeai as genai
8
  from io import BytesIO
9
  import PyPDF2
10
+ import soundfile as sf
11
+ import librosa
12
+ import numpy as np
13
 
14
  # Set your API key
15
  api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg" # Replace with your actual API key
 
46
  st.session_state['chat_history'] = []
47
  if 'file_uploader_key' not in st.session_state:
48
  st.session_state['file_uploader_key'] = str(uuid.uuid4())
49
+ if 'recording_enabled' not in st.session_state:
50
+ st.session_state['recording_enabled'] = False
51
+ if 'recorded_audio' not in st.session_state:
52
+ st.session_state['recorded_audio'] = None
53
 
54
  # --- Streamlit UI ---
55
  st.title("Gemini Chatbot")
 
69
  def clear_conversation():
70
  st.session_state['chat_history'] = []
71
  st.session_state['file_uploader_key'] = str(uuid.uuid4())
72
+ st.session_state['recorded_audio'] = None
73
 
74
  def display_chat_history():
75
  chat_container = st.empty()
 
82
  elif 'data' in parts:
83
  mime_type = parts.get('mime_type', '')
84
  if mime_type.startswith('image'):
85
+ st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
86
  caption='Uploaded Image', use_column_width=True)
87
  elif mime_type == 'application/pdf':
88
  st.write("**PDF Content:**")
 
95
  elif mime_type.startswith('video'):
96
  st.video(io.BytesIO(base64.b64decode(parts['data'])))
97
 
98
+ # --- Audio Recording Functions ---
99
+ def start_recording():
100
+ st.session_state['recording_enabled'] = True
101
+ st.warning("Recording started. Click 'Stop Recording' to finish.")
102
+
103
+ def stop_recording():
104
+ st.session_state['recording_enabled'] = False
105
+ st.success("Recording stopped.")
106
+
107
+ def process_audio(audio_data):
108
+ # Convert to WAV format for compatibility
109
+ wav_data, samplerate = librosa.load(audio_data, sr=None)
110
+ sf.write("temp.wav", wav_data, samplerate, format="wav")
111
+ with open("temp.wav", "rb") as f:
112
+ wav_content = f.read()
113
+ return wav_content, "audio/wav"
114
+
115
  # --- Send Message Function ---
116
  def send_message():
117
  user_input = st.session_state.user_input
118
  uploaded_files = st.session_state.uploaded_files
119
+ recorded_audio = st.session_state.recorded_audio
120
  prompt_parts = []
121
 
122
  # Add user input to the prompt
 
133
  {"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
134
  )
135
 
136
+ # Handle recorded audio
137
+ if recorded_audio:
138
+ audio_content, audio_type = process_audio(recorded_audio)
139
+ prompt_parts.append(get_file_base64(audio_content, audio_type))
140
+ st.session_state['chat_history'].append(
141
+ {"role": "user", "parts": [get_file_base64(audio_content, audio_type)]}
142
+ )
143
+ st.session_state['recorded_audio'] = None # Reset recorded audio
144
+
145
  # Generate response using the selected model
146
  try:
147
  model = genai.GenerativeModel(
 
163
  st.audio(tts_file, format='audio/mp3')
164
 
165
  except Exception as e:
166
+ st.error(f"An error occurred: {e}")
167
 
168
  st.session_state.user_input = ''
169
  st.session_state.uploaded_files = []
 
196
  key=st.session_state.file_uploader_key
197
  )
198
 
199
+ # --- Audio Recording ---
200
+ st.audio_recorder("Record audio:", key="recorded_audio")
201
+ col3, col4 = st.columns([1, 1])
202
+ with col3:
203
+ if st.button("Start Recording"):
204
+ start_recording()
205
+ with col4:
206
+ if st.button("Stop Recording"):
207
+ stop_recording()
208
+
209
  # --- Other Buttons ---
210
  st.button("Clear Conversation", on_click=clear_conversation)
211