Twelve2five commited on
Commit
01f7ec4
·
verified ·
1 Parent(s): 797af4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -67
app.py CHANGED
@@ -55,35 +55,35 @@ class DeepSeekAPI:
55
 
56
  deepseek_client = DeepSeekAPI(api_key=os.getenv("DEEPSEEK_API_KEY"))
57
 
58
- # Define handler function for FastRTC Stream
59
  def response(
60
  audio: tuple[int, np.ndarray],
61
- chatbot=None,
62
  ):
63
- # Initialize chatbot if None
64
  chatbot = chatbot or []
65
- messages = [{"role": msg[0], "content": msg[1]} for msg in chatbot] if chatbot else []
66
 
67
  # Convert speech to text
68
  text = stt_model.stt(audio)
69
  logger.info(f"User said: {text}")
70
 
71
  # Add user message to chat
72
- chatbot.append(("user", text))
73
  yield AdditionalOutputs(chatbot)
74
 
75
  # Get AI response
76
- formatted_messages = []
77
- for role, content in chatbot:
78
- formatted_messages.append({"role": "user" if role == "user" else "assistant", "content": content})
 
 
79
 
80
  # Call DeepSeek API
81
- response_data = deepseek_client.chat_completion(formatted_messages)
82
  response_text = response_data["choices"][0]["message"]["content"]
83
  logger.info(f"DeepSeek response: {response_text[:50]}...")
84
 
85
- # Add AI response to chat
86
- chatbot.append(("assistant", response_text))
 
87
 
88
  # Convert response to speech
89
  if os.getenv("ELEVENLABS_API_KEY"):
@@ -108,8 +108,6 @@ def response(
108
  # Fall back to gTTS
109
  logger.info("ElevenLabs API key not found, using gTTS...")
110
  yield from use_gtts_for_text(response_text)
111
-
112
- yield AdditionalOutputs(chatbot)
113
 
114
  def use_gtts_for_text(text):
115
  """Helper function to generate speech with gTTS for the entire text"""
@@ -158,38 +156,22 @@ def use_gtts_for_text(text):
158
  logger.error(f"gTTS error: {e}")
159
  yield None
160
 
161
- # Enhanced WebRTC configuration
162
  rtc_configuration = {
163
  "iceServers": [
164
  {"urls": ["stun:stun.l.google.com:19302"]},
165
- {"urls": ["stun:stun1.l.google.com:19302"]},
166
  {
167
  "urls": ["turn:openrelay.metered.ca:80"],
168
  "username": "openrelayproject",
169
  "credential": "openrelayproject"
170
- },
171
- {
172
- "urls": ["turn:openrelay.metered.ca:443?transport=tcp"],
173
- "username": "openrelayproject",
174
- "credential": "openrelayproject"
175
  }
176
- ],
177
- "iceCandidatePoolSize": 10
178
  }
179
 
180
- # Build the interface - we need separate Blocks for chatbot and Stream
181
- with gr.Blocks(title="LLM Voice Assistant") as demo:
182
- gr.Markdown("# LLM Voice Chat (Powered by DeepSeek & ElevenLabs)")
183
- gr.Markdown("Click the microphone button to start speaking")
184
-
185
- # Create the main chatbot display
186
- chatbot = gr.Chatbot(label="Conversation")
187
-
188
- # Create the Stream component outside of the Blocks context to avoid conflicts
189
- # We'll insert it into the interface later
190
- stream_container = gr.HTML("<div id='stream-placeholder'>Loading WebRTC component...</div>")
191
 
192
- # Create the FastRTC Stream separately
193
  stream = Stream(
194
  modality="audio",
195
  mode="send-receive",
@@ -197,40 +179,13 @@ stream = Stream(
197
  additional_outputs_handler=lambda a, b: b,
198
  additional_inputs=[chatbot],
199
  additional_outputs=[chatbot],
200
- rtc_configuration=rtc_configuration
 
201
  )
202
 
203
- # Custom mount function
204
- def mount_components():
205
- import gradio as gr
206
- import os
207
-
208
- # Get the main interface
209
- main_interface = demo
210
-
211
- # Add the Stream interface to a custom Blocks
212
- with gr.Blocks(analytics_enabled=False) as stream_interface:
213
- stream.render()
214
-
215
- # Create a custom app that hosts both interfaces on different routes
216
- app = gr.routes.App()
217
- app.add_route("/", main_interface)
218
- app.add_route("/stream", stream_interface)
219
-
220
- # Launch the combined app
221
- app.launch()
222
 
223
- # Launch with the mount function
224
  if __name__ == "__main__":
225
- # Local development
226
- demo.launch(share=True)
227
-
228
- # Launch the Stream component separately for local development
229
- stream.ui.launch(server_port=7861, share=True)
230
- else:
231
- # For Hugging Face Spaces
232
- # Initialize FastRTC in Spaces
233
- app = gr.mount_gradio_app(stream.app, demo, path="/")
234
-
235
- # Launch both components
236
- gr.launch_app(app)
 
55
 
56
  deepseek_client = DeepSeekAPI(api_key=os.getenv("DEEPSEEK_API_KEY"))
57
 
 
58
  def response(
59
  audio: tuple[int, np.ndarray],
60
+ chatbot: list[tuple] | None = None,
61
  ):
 
62
  chatbot = chatbot or []
 
63
 
64
  # Convert speech to text
65
  text = stt_model.stt(audio)
66
  logger.info(f"User said: {text}")
67
 
68
  # Add user message to chat
69
+ chatbot.append((text, None))
70
  yield AdditionalOutputs(chatbot)
71
 
72
  # Get AI response
73
+ messages = []
74
+ for user_text, assistant_text in chatbot:
75
+ messages.append({"role": "user", "content": user_text})
76
+ if assistant_text:
77
+ messages.append({"role": "assistant", "content": assistant_text})
78
 
79
  # Call DeepSeek API
80
+ response_data = deepseek_client.chat_completion(messages)
81
  response_text = response_data["choices"][0]["message"]["content"]
82
  logger.info(f"DeepSeek response: {response_text[:50]}...")
83
 
84
+ # Update chatbot with AI response
85
+ chatbot[-1] = (text, response_text)
86
+ yield AdditionalOutputs(chatbot)
87
 
88
  # Convert response to speech
89
  if os.getenv("ELEVENLABS_API_KEY"):
 
108
  # Fall back to gTTS
109
  logger.info("ElevenLabs API key not found, using gTTS...")
110
  yield from use_gtts_for_text(response_text)
 
 
111
 
112
  def use_gtts_for_text(text):
113
  """Helper function to generate speech with gTTS for the entire text"""
 
156
  logger.error(f"gTTS error: {e}")
157
  yield None
158
 
159
+ # Basic WebRTC configuration - just the minimum needed
160
  rtc_configuration = {
161
  "iceServers": [
162
  {"urls": ["stun:stun.l.google.com:19302"]},
 
163
  {
164
  "urls": ["turn:openrelay.metered.ca:80"],
165
  "username": "openrelayproject",
166
  "credential": "openrelayproject"
 
 
 
 
 
167
  }
168
+ ]
 
169
  }
170
 
171
+ # Create chatbot component for tracking conversation
172
+ chatbot = gr.Chatbot()
 
 
 
 
 
 
 
 
 
173
 
174
+ # Create Stream outside of any blocks context
175
  stream = Stream(
176
  modality="audio",
177
  mode="send-receive",
 
179
  additional_outputs_handler=lambda a, b: b,
180
  additional_inputs=[chatbot],
181
  additional_outputs=[chatbot],
182
+ rtc_configuration=rtc_configuration,
183
+ ui_args={"title": "LLM Voice Chat (DeepSeek & ElevenLabs)"}
184
  )
185
 
186
+ # Export the UI directly
187
+ demo = stream.ui
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
+ # Expose the demo for Hugging Face Spaces
190
  if __name__ == "__main__":
191
+ demo.launch()