Twelve2five commited on
Commit
f558bc0
·
verified ·
1 Parent(s): 227326d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -30
app.py CHANGED
@@ -17,13 +17,23 @@ import io
17
  import soundfile as sf
18
  from gtts import gTTS
19
  import re
 
 
 
 
 
20
 
21
  # Load environment variables
22
  load_dotenv()
23
 
 
 
 
24
  # Initialize clients
 
25
  elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
26
  stt_model = get_stt_model()
 
27
 
28
  class DeepSeekAPI:
29
  def __init__(self, api_key):
@@ -45,7 +55,7 @@ class DeepSeekAPI:
45
 
46
  # Check for error response
47
  if response.status_code != 200:
48
- print(f"DeepSeek API error: {response.status_code} - {response.text}")
49
  return {"choices": [{"message": {"content": "I'm sorry, I encountered an error processing your request."}}]}
50
 
51
  return response.json()
@@ -60,8 +70,9 @@ def response(
60
  messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
61
 
62
  # Convert speech to text
 
63
  text = stt_model.stt(audio)
64
- print("prompt:", text)
65
 
66
  # Add user message to chat
67
  chatbot.append({"role": "user", "content": text})
@@ -71,8 +82,10 @@ def response(
71
  messages.append({"role": "user", "content": text})
72
 
73
  # Call DeepSeek API
 
74
  response_data = deepseek_client.chat_completion(messages)
75
  response_text = response_data["choices"][0]["message"]["content"]
 
76
 
77
  # Add AI response to chat
78
  chatbot.append({"role": "assistant", "content": response_text})
@@ -80,7 +93,7 @@ def response(
80
  # Convert response to speech
81
  if os.getenv("ELEVENLABS_API_KEY"):
82
  try:
83
- print(f"Generating ElevenLabs speech for response")
84
 
85
  # Use the streaming API for better experience
86
  for chunk in elevenlabs_client.text_to_speech.convert_as_stream(
@@ -93,12 +106,12 @@ def response(
93
  yield (24000, audio_array)
94
 
95
  except Exception as e:
96
- print(f"ElevenLabs error: {e}, falling back to gTTS")
97
  # Fall back to gTTS
98
  yield from use_gtts_for_text(response_text)
99
  else:
100
  # Fall back to gTTS
101
- print("ElevenLabs API key not found, using gTTS...")
102
  yield from use_gtts_for_text(response_text)
103
 
104
  yield AdditionalOutputs(chatbot)
@@ -114,7 +127,7 @@ def use_gtts_for_text(text):
114
  continue
115
 
116
  mp3_fp = io.BytesIO()
117
- print(f"Using gTTS for sentence: {sentence[:30]}...")
118
  tts = gTTS(text=sentence, lang='en-us', tld='com', slow=False)
119
  tts.write_to_fp(mp3_fp)
120
  mp3_fp.seek(0)
@@ -147,48 +160,103 @@ def use_gtts_for_text(text):
147
  chunk = chunk.reshape(1, -1)
148
  yield (24000, chunk)
149
  except Exception as e:
150
- print(f"gTTS error: {e}")
151
  yield None
152
 
153
- # Enhanced WebRTC configuration
154
  rtc_configuration = {
155
  "iceServers": [
156
- {"urls": ["stun:stun.l.google.com:19302", "stun:stun1.l.google.com:19302"]},
 
 
 
 
 
 
 
157
  {
158
  "urls": ["turn:openrelay.metered.ca:80"],
159
  "username": "openrelayproject",
160
  "credential": "openrelayproject"
161
  },
 
 
 
 
 
162
  {
163
  "urls": ["turn:openrelay.metered.ca:443?transport=tcp"],
164
  "username": "openrelayproject",
165
  "credential": "openrelayproject"
166
- }
 
 
 
 
 
167
  ],
168
- "iceCandidatePoolSize": 10
 
 
 
169
  }
170
 
171
- # Create the Stream component outside of any Blocks context
172
- chatbot = gr.Chatbot(type="messages", visible=False) # Will be used for state only
173
-
174
- stream = Stream(
175
- modality="audio",
176
- mode="send-receive",
177
- handler=ReplyOnPause(response, input_sample_rate=16000),
178
- additional_outputs_handler=lambda a, b: b,
179
- additional_inputs=[chatbot],
180
- additional_outputs=[chatbot],
181
- rtc_configuration=rtc_configuration,
182
- concurrency_limit=5 if get_space() else None,
183
- time_limit=90 if get_space() else None,
184
- ui_args={"title": "LLM Voice Chat (Powered by DeepSeek & ElevenLabs)"}
185
- )
186
-
187
- # Create a basic Gradio interface
188
- demo = stream.ui
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  # Launch the app
191
  if __name__ == "__main__":
192
  # Local development
 
193
  os.environ["GRADIO_SSR_MODE"] = "false"
194
- demo.launch(server_port=7860)
 
 
 
 
 
17
  import soundfile as sf
18
  from gtts import gTTS
19
  import re
20
+ import logging
21
+
22
+ # Set up logging for WebRTC debugging
23
+ logging.basicConfig(level=logging.DEBUG)
24
+ logger = logging.getLogger("fastrtc-voice-assistant")
25
 
26
  # Load environment variables
27
  load_dotenv()
28
 
29
+ # Enable WebRTC debug tracing
30
+ os.environ["WEBRTC_TRACE"] = "WEBRTC_TRACE_ALL"
31
+
32
  # Initialize clients
33
+ logger.info("Initializing clients...")
34
  elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
35
  stt_model = get_stt_model()
36
+ logger.info("Clients initialized")
37
 
38
  class DeepSeekAPI:
39
  def __init__(self, api_key):
 
55
 
56
  # Check for error response
57
  if response.status_code != 200:
58
+ logger.error(f"DeepSeek API error: {response.status_code} - {response.text}")
59
  return {"choices": [{"message": {"content": "I'm sorry, I encountered an error processing your request."}}]}
60
 
61
  return response.json()
 
70
  messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
71
 
72
  # Convert speech to text
73
+ logger.info("Converting speech to text...")
74
  text = stt_model.stt(audio)
75
+ logger.info(f"User said: {text}")
76
 
77
  # Add user message to chat
78
  chatbot.append({"role": "user", "content": text})
 
82
  messages.append({"role": "user", "content": text})
83
 
84
  # Call DeepSeek API
85
+ logger.info("Calling DeepSeek API...")
86
  response_data = deepseek_client.chat_completion(messages)
87
  response_text = response_data["choices"][0]["message"]["content"]
88
+ logger.info(f"DeepSeek response: {response_text[:50]}...")
89
 
90
  # Add AI response to chat
91
  chatbot.append({"role": "assistant", "content": response_text})
 
93
  # Convert response to speech
94
  if os.getenv("ELEVENLABS_API_KEY"):
95
  try:
96
+ logger.info("Using ElevenLabs for speech generation")
97
 
98
  # Use the streaming API for better experience
99
  for chunk in elevenlabs_client.text_to_speech.convert_as_stream(
 
106
  yield (24000, audio_array)
107
 
108
  except Exception as e:
109
+ logger.error(f"ElevenLabs error: {e}, falling back to gTTS")
110
  # Fall back to gTTS
111
  yield from use_gtts_for_text(response_text)
112
  else:
113
  # Fall back to gTTS
114
+ logger.info("ElevenLabs API key not found, using gTTS...")
115
  yield from use_gtts_for_text(response_text)
116
 
117
  yield AdditionalOutputs(chatbot)
 
127
  continue
128
 
129
  mp3_fp = io.BytesIO()
130
+ logger.info(f"Using gTTS for: {sentence[:30]}...")
131
  tts = gTTS(text=sentence, lang='en-us', tld='com', slow=False)
132
  tts.write_to_fp(mp3_fp)
133
  mp3_fp.seek(0)
 
160
  chunk = chunk.reshape(1, -1)
161
  yield (24000, chunk)
162
  except Exception as e:
163
+ logger.error(f"gTTS error: {e}")
164
  yield None
165
 
166
+ # Comprehensive WebRTC configuration with multiple STUN/TURN options
167
  rtc_configuration = {
168
  "iceServers": [
169
+ # Google STUN servers
170
+ {"urls": ["stun:stun.l.google.com:19302"]},
171
+ {"urls": ["stun:stun1.l.google.com:19302"]},
172
+ {"urls": ["stun:stun2.l.google.com:19302"]},
173
+ {"urls": ["stun:stun3.l.google.com:19302"]},
174
+ {"urls": ["stun:stun4.l.google.com:19302"]},
175
+
176
+ # OpenRelay TURN servers
177
  {
178
  "urls": ["turn:openrelay.metered.ca:80"],
179
  "username": "openrelayproject",
180
  "credential": "openrelayproject"
181
  },
182
+ {
183
+ "urls": ["turn:openrelay.metered.ca:443"],
184
+ "username": "openrelayproject",
185
+ "credential": "openrelayproject"
186
+ },
187
  {
188
  "urls": ["turn:openrelay.metered.ca:443?transport=tcp"],
189
  "username": "openrelayproject",
190
  "credential": "openrelayproject"
191
+ },
192
+
193
+ # Additional public STUN servers
194
+ {"urls": ["stun:stun.stunprotocol.org:3478"]},
195
+ {"urls": ["stun:stun.voip.blackberry.com:3478"]},
196
+ {"urls": ["stun:stun.nextcloud.com:443"]}
197
  ],
198
+ "iceCandidatePoolSize": 10,
199
+ "bundlePolicy": "max-bundle",
200
+ "rtcpMuxPolicy": "require",
201
+ "iceTransportPolicy": "all" # Try "relay" if "all" doesn't work
202
  }
203
 
204
+ # Create a simple wrapper for the webchat UI
205
+ with gr.Blocks(title="LLM Voice Chat (Powered by DeepSeek & ElevenLabs)") as demo:
206
+ gr.Markdown("# LLM Voice Chat\nPowered by DeepSeek & ElevenLabs")
207
+
208
+ with gr.Row():
209
+ with gr.Column(scale=3):
210
+ # Create the chatbot component
211
+ chatbot = gr.Chatbot(type="messages")
212
+
213
+ # For debugging, allow seeing connection status
214
+ connection_status = gr.Textbox(label="Connection Status",
215
+ value="Ready to connect. Click the microphone button to start.",
216
+ interactive=False)
217
+
218
+ # Display debugging information
219
+ debug_info = gr.Textbox(label="Debug Info",
220
+ value="WebRTC debug information will appear here.",
221
+ interactive=False)
222
+
223
+ # Button to manually refresh the page
224
+ refresh_btn = gr.Button("Refresh Connection")
225
+
226
+ def refresh_page():
227
+ debug_info.value = f"Attempting to refresh connection at {time.time()}"
228
+ return "Refreshed", f"Connection refresh attempted at {time.time()}"
229
+
230
+ refresh_btn.click(
231
+ refresh_page,
232
+ outputs=[connection_status, debug_info]
233
+ )
234
+
235
+ logger.info("Creating Stream component...")
236
+ # Initialize the stream (outside of the blocks context)
237
+ stream = Stream(
238
+ modality="audio",
239
+ mode="send-receive",
240
+ handler=ReplyOnPause(response, input_sample_rate=16000),
241
+ additional_outputs_handler=lambda a, b: b,
242
+ additional_inputs=[chatbot],
243
+ additional_outputs=[chatbot],
244
+ rtc_configuration=rtc_configuration,
245
+ concurrency_limit=5 if get_space() else None,
246
+ time_limit=90 if get_space() else None
247
+ )
248
+
249
+ # Mount the stream to the blocks interface
250
+ stream.render()
251
+ logger.info("Stream component created and rendered")
252
 
253
  # Launch the app
254
  if __name__ == "__main__":
255
  # Local development
256
+ logger.info("Running in development mode")
257
  os.environ["GRADIO_SSR_MODE"] = "false"
258
+ demo.launch(server_port=7860, share=True)
259
+ else:
260
+ # Hugging Face Spaces
261
+ logger.info("Running in Hugging Face Spaces")
262
+ demo.launch()