ziyadsuper2017 commited on
Commit
4479cfb
·
verified ·
1 Parent(s): a6a2d49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -45
app.py CHANGED
@@ -7,9 +7,7 @@ from gtts import gTTS
7
  import google.generativeai as genai
8
  from io import BytesIO
9
  import PyPDF2
10
- import soundfile as sf
11
- import librosa
12
- import numpy as np
13
 
14
  # Set your API key
15
  api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg" # Replace with your actual API key
@@ -46,10 +44,6 @@ if 'chat_history' not in st.session_state:
46
  st.session_state['chat_history'] = []
47
  if 'file_uploader_key' not in st.session_state:
48
  st.session_state['file_uploader_key'] = str(uuid.uuid4())
49
- if 'recording_enabled' not in st.session_state:
50
- st.session_state['recording_enabled'] = False
51
- if 'recorded_audio' not in st.session_state:
52
- st.session_state['recorded_audio'] = None
53
 
54
  # --- Streamlit UI ---
55
  st.title("Gemini Chatbot")
@@ -69,14 +63,13 @@ def get_file_base64(file_content, mime_type):
69
  def clear_conversation():
70
  st.session_state['chat_history'] = []
71
  st.session_state['file_uploader_key'] = str(uuid.uuid4())
72
- st.session_state['recorded_audio'] = None
73
 
74
  def display_chat_history():
75
  chat_container = st.empty()
76
  with chat_container.container():
77
  for entry in st.session_state['chat_history']:
78
  role = entry["role"]
79
- parts = entry["parts"][0]
80
  if 'text' in parts:
81
  st.markdown(f"**{role.title()}:** {parts['text']}")
82
  elif 'data' in parts:
@@ -85,7 +78,7 @@ def display_chat_history():
85
  st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
86
  caption='Uploaded Image', use_column_width=True)
87
  elif mime_type == 'application/pdf':
88
- st.write("**PDF Content:**")
89
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(base64.b64decode(parts['data'])))
90
  for page_num in range(len(pdf_reader.pages)):
91
  page = pdf_reader.pages[page_num]
@@ -95,28 +88,10 @@ def display_chat_history():
95
  elif mime_type.startswith('video'):
96
  st.video(io.BytesIO(base64.b64decode(parts['data'])))
97
 
98
- # --- Audio Recording Functions ---
99
- def start_recording():
100
- st.session_state['recording_enabled'] = True
101
- st.warning("Recording started. Click 'Stop Recording' to finish.")
102
-
103
- def stop_recording():
104
- st.session_state['recording_enabled'] = False
105
- st.success("Recording stopped.")
106
-
107
- def process_audio(audio_data):
108
- # Convert to WAV format for compatibility
109
- wav_data, samplerate = librosa.load(audio_data, sr=None)
110
- sf.write("temp.wav", wav_data, samplerate, format="wav")
111
- with open("temp.wav", "rb") as f:
112
- wav_content = f.read()
113
- return wav_content, "audio/wav"
114
-
115
  # --- Send Message Function ---
116
- def send_message():
117
  user_input = st.session_state.user_input
118
  uploaded_files = st.session_state.uploaded_files
119
- recorded_audio = st.session_state.recorded_audio
120
  prompt_parts = []
121
 
122
  # Add user input to the prompt
@@ -133,14 +108,12 @@ def send_message():
133
  {"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
134
  )
135
 
136
- # Handle recorded audio
137
- if recorded_audio:
138
- audio_content, audio_type = process_audio(recorded_audio)
139
- prompt_parts.append(get_file_base64(audio_content, audio_type))
140
  st.session_state['chat_history'].append(
141
- {"role": "user", "parts": [get_file_base64(audio_content, audio_type)]}
142
  )
143
- st.session_state['recorded_audio'] = None # Reset recorded audio
144
 
145
  # Generate response using the selected model
146
  try:
@@ -174,7 +147,6 @@ def send_message():
174
 
175
  # --- User Input Area ---
176
  col1, col2 = st.columns([3, 1])
177
-
178
  with col1:
179
  user_input = st.text_area(
180
  "Enter your message:",
@@ -196,15 +168,25 @@ uploaded_files = st.file_uploader(
196
  key=st.session_state.file_uploader_key
197
  )
198
 
199
- # --- Audio Recording ---
200
- st.audio_recorder("Record audio:", key="recorded_audio")
201
- col3, col4 = st.columns([1, 1])
202
- with col3:
203
- if st.button("Start Recording"):
204
- start_recording()
205
- with col4:
206
- if st.button("Stop Recording"):
207
- stop_recording()
 
 
 
 
 
 
 
 
 
 
208
 
209
  # --- Other Buttons ---
210
  st.button("Clear Conversation", on_click=clear_conversation)
 
7
  import google.generativeai as genai
8
  from io import BytesIO
9
  import PyPDF2
10
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
 
 
11
 
12
  # Set your API key
13
  api_key = "AIzaSyAHD0FwX-Ds6Y3eI-i5Oz7IdbJqR6rN7pg" # Replace with your actual API key
 
44
  st.session_state['chat_history'] = []
45
  if 'file_uploader_key' not in st.session_state:
46
  st.session_state['file_uploader_key'] = str(uuid.uuid4())
 
 
 
 
47
 
48
  # --- Streamlit UI ---
49
  st.title("Gemini Chatbot")
 
63
  def clear_conversation():
64
  st.session_state['chat_history'] = []
65
  st.session_state['file_uploader_key'] = str(uuid.uuid4())
 
66
 
67
  def display_chat_history():
68
  chat_container = st.empty()
69
  with chat_container.container():
70
  for entry in st.session_state['chat_history']:
71
  role = entry["role"]
72
+ parts = entry["parts"][0]
73
  if 'text' in parts:
74
  st.markdown(f"**{role.title()}:** {parts['text']}")
75
  elif 'data' in parts:
 
78
  st.image(Image.open(io.BytesIO(base64.b64decode(parts['data']))),
79
  caption='Uploaded Image', use_column_width=True)
80
  elif mime_type == 'application/pdf':
81
+ st.write("**PDF Content:**")
82
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(base64.b64decode(parts['data'])))
83
  for page_num in range(len(pdf_reader.pages)):
84
  page = pdf_reader.pages[page_num]
 
88
  elif mime_type.startswith('video'):
89
  st.video(io.BytesIO(base64.b64decode(parts['data'])))
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # --- Send Message Function ---
92
+ def send_message(audio_data=None):
93
  user_input = st.session_state.user_input
94
  uploaded_files = st.session_state.uploaded_files
 
95
  prompt_parts = []
96
 
97
  # Add user input to the prompt
 
108
  {"role": "user", "parts": [get_file_base64(file_content, uploaded_file.type)]}
109
  )
110
 
111
+ # Handle audio data from WebRTC
112
+ if audio_data:
113
+ prompt_parts.append(get_file_base64(audio_data, 'audio/wav'))
 
114
  st.session_state['chat_history'].append(
115
+ {"role": "user", "parts": [get_file_base64(audio_data, 'audio/wav')]}
116
  )
 
117
 
118
  # Generate response using the selected model
119
  try:
 
147
 
148
  # --- User Input Area ---
149
  col1, col2 = st.columns([3, 1])
 
150
  with col1:
151
  user_input = st.text_area(
152
  "Enter your message:",
 
168
  key=st.session_state.file_uploader_key
169
  )
170
 
171
+ # --- WebRTC Audio Recording ---
172
+ RTC_CONFIGURATION = RTCConfiguration({"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]})
173
+
174
+ webrtc_ctx = webrtc_streamer(
175
+ key="audio-recorder",
176
+ mode=WebRtcMode.SENDONLY,
177
+ rtc_configuration=RTC_CONFIGURATION,
178
+ audio_receiver_size=256,
179
+ media_stream_constraints={"video": False, "audio": True},
180
+ )
181
+
182
+ if webrtc_ctx.audio_receiver:
183
+ st.write("Recording audio...")
184
+ audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=None)
185
+ audio_data = b"".join([frame for frame in audio_frames])
186
+
187
+ # Send the recorded audio when the "Send" button is clicked
188
+ if st.button("Send Recording"):
189
+ send_message(audio_data=audio_data)
190
 
191
  # --- Other Buttons ---
192
  st.button("Clear Conversation", on_click=clear_conversation)