Twelve2five commited on
Commit
1709348
·
verified ·
1 Parent(s): dcc612f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -26
app.py CHANGED
@@ -75,25 +75,155 @@ def response(
75
 
76
  yield AdditionalOutputs(chatbot)
77
 
78
- # Create Gradio interface
79
- chatbot = gr.Chatbot(type="messages", height=500, label="Conversation")
80
-
81
- # Define enhanced UI arguments
82
- enhanced_ui_args = {
83
- "title": "LLM Voice Chat (Powered by DeepSeek & ElevenLabs)",
84
- "description": "Speak after clicking the microphone button below. Your conversation will appear in the chat.",
85
- "theme": gr.themes.Soft(),
86
- "css": """
87
- .gradio-container {
88
- min-height: 600px;
89
- }
90
- #chatbot {
91
- min-height: 400px;
92
- }
93
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  }
95
 
96
- # Create Stream with enhanced UI args and the RTC configuration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  stream = Stream(
98
  modality="audio",
99
  mode="send-receive",
@@ -101,18 +231,87 @@ stream = Stream(
101
  additional_outputs_handler=lambda a, b: b,
102
  additional_inputs=[chatbot],
103
  additional_outputs=[chatbot],
104
- ui_args=enhanced_ui_args,
105
  rtc_configuration=rtc_config
106
  )
107
 
108
- # Create FastAPI app and mount stream
109
- from fastapi import FastAPI
 
 
 
 
 
110
  app = FastAPI()
111
- app = gr.mount_gradio_app(app, stream.ui, path="/")
112
- stream.mount(app) # Mount the stream for telephone/fastphone integration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  # Update the chat completion part based on available methods:
115
- # We'll use direct HTTP requests as a fallback since the API structure is unclear:
116
  def get_deepseek_response(messages):
117
  url = "https://api.deepseek.com/v1/chat/completions"
118
  headers = {
@@ -259,9 +458,6 @@ def text_to_speech(text):
259
  print(f"Exception in text_to_speech: {e}")
260
  yield None
261
 
262
- # Add this debug statement AFTER the function definition
263
- print("text_to_speech function:", inspect.getsource(text_to_speech))
264
-
265
  if __name__ == "__main__":
266
  os.environ["GRADIO_SSR_MODE"] = "false"
267
 
 
75
 
76
  yield AdditionalOutputs(chatbot)
77
 
78
+ # Custom HTML for a better-looking interface
79
+ custom_html = """
80
+ <div class="container">
81
+ <div class="voice-chat-container">
82
+ <div id="chat-messages" class="chat-messages">
83
+ <!-- Messages will appear here -->
84
+ </div>
85
+ <div class="audio-controls">
86
+ <button id="mic-button" class="mic-button">
87
+ <span class="mic-icon">🎤</span>
88
+ </button>
89
+ <div id="status-indicator" class="status-indicator">Ready</div>
90
+ </div>
91
+ </div>
92
+ <audio id="ai-response-audio" autoplay></audio>
93
+ </div>
94
+
95
+ <script src="file=webrtc_client.js"></script>
96
+ <script>
97
+ document.addEventListener('DOMContentLoaded', function() {
98
+ const micButton = document.getElementById('mic-button');
99
+ const statusIndicator = document.getElementById('status-indicator');
100
+ let isRecording = false;
101
+
102
+ // Initialize WebRTC
103
+ setupWebRTC().then(() => {
104
+ statusIndicator.textContent = 'Ready';
105
+ }).catch(error => {
106
+ console.error('WebRTC setup failed:', error);
107
+ statusIndicator.textContent = 'Microphone access failed';
108
+ });
109
+
110
+ // Toggle recording on button click
111
+ micButton.addEventListener('click', function() {
112
+ if (isRecording) {
113
+ stopRecording();
114
+ micButton.classList.remove('recording');
115
+ statusIndicator.textContent = 'Stopped';
116
+ } else {
117
+ startRecording();
118
+ micButton.classList.add('recording');
119
+ statusIndicator.textContent = 'Listening...';
120
+ }
121
+ isRecording = !isRecording;
122
+ });
123
+ });
124
+
125
+ // Function to update chat interface
126
+ function updateChat(message, isUser) {
127
+ const chatMessages = document.getElementById('chat-messages');
128
+ const messageDiv = document.createElement('div');
129
+ messageDiv.className = isUser ? 'user-message' : 'ai-message';
130
+ messageDiv.textContent = message;
131
+ chatMessages.appendChild(messageDiv);
132
+ chatMessages.scrollTop = chatMessages.scrollHeight;
133
+ }
134
+ </script>
135
+
136
+ <style>
137
+ .container {
138
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
139
+ max-width: 800px;
140
+ margin: 0 auto;
141
+ padding: 20px;
142
+ }
143
+
144
+ .voice-chat-container {
145
+ background-color: #f8f9fa;
146
+ border-radius: 10px;
147
+ box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
148
+ overflow: hidden;
149
+ display: flex;
150
+ flex-direction: column;
151
+ height: 500px;
152
+ }
153
+
154
+ .chat-messages {
155
+ flex: 1;
156
+ overflow-y: auto;
157
+ padding: 20px;
158
+ }
159
+
160
+ .user-message, .ai-message {
161
+ padding: 10px 15px;
162
+ border-radius: 18px;
163
+ margin-bottom: 10px;
164
+ max-width: 70%;
165
+ word-wrap: break-word;
166
+ }
167
+
168
+ .user-message {
169
+ background-color: #005fff;
170
+ color: white;
171
+ margin-left: auto;
172
+ }
173
+
174
+ .ai-message {
175
+ background-color: #e9e9eb;
176
+ color: #333;
177
+ }
178
+
179
+ .audio-controls {
180
+ padding: 15px;
181
+ display: flex;
182
+ align-items: center;
183
+ background-color: #ffffff;
184
+ border-top: 1px solid #e0e0e0;
185
+ }
186
+
187
+ .mic-button {
188
+ width: 60px;
189
+ height: 60px;
190
+ border-radius: 50%;
191
+ background-color: #f0f0f0;
192
+ border: none;
193
+ cursor: pointer;
194
+ display: flex;
195
+ align-items: center;
196
+ justify-content: center;
197
+ transition: background-color 0.3s;
198
+ }
199
+
200
+ .mic-button:hover {
201
+ background-color: #e0e0e0;
202
  }
203
 
204
+ .mic-button.recording {
205
+ background-color: #ff4b4b;
206
+ }
207
+
208
+ .mic-icon {
209
+ font-size: 24px;
210
+ }
211
+
212
+ .status-indicator {
213
+ margin-left: 15px;
214
+ color: #666;
215
+ font-size: 14px;
216
+ }
217
+ </style>
218
+ """
219
+
220
+ # Create Gradio interface with custom HTML and CSS
221
+ with gr.Blocks() as custom_interface:
222
+ gr.HTML(custom_html)
223
+ # Hidden chatbot to store conversation state
224
+ chatbot = gr.Chatbot(visible=False)
225
+
226
+ # Create Stream with RTC configuration
227
  stream = Stream(
228
  modality="audio",
229
  mode="send-receive",
 
231
  additional_outputs_handler=lambda a, b: b,
232
  additional_inputs=[chatbot],
233
  additional_outputs=[chatbot],
234
+ ui_args={"title": "LLM Voice Chat (Powered by DeepSeek & ElevenLabs)"},
235
  rtc_configuration=rtc_config
236
  )
237
 
238
+ # Create FastAPI app and mount stream and custom interface
239
+ from fastapi import FastAPI, Request
240
+ from fastapi.responses import HTMLResponse
241
+ from fastapi.staticfiles import StaticFiles
242
+ from fastapi.templating import Jinja2Templates
243
+ from pathlib import Path
244
+
245
  app = FastAPI()
246
+
247
+ # Mount static files directory
248
+ static_dir = Path(__file__).parent / "static"
249
+ static_dir.mkdir(exist_ok=True)
250
+
251
+ # Copy the WebRTC client JS to static directory
252
+ webrtc_client_js = Path(__file__).parent / "fastrtc-voice-assistant" / "webrtc_client.js"
253
+ if webrtc_client_js.exists():
254
+ with open(webrtc_client_js, "r") as src_file:
255
+ webrtc_js_content = src_file.read()
256
+
257
+ with open(static_dir / "webrtc_client.js", "w") as dest_file:
258
+ dest_file.write(webrtc_js_content)
259
+
260
+ app.mount("/static", StaticFiles(directory=static_dir), name="static")
261
+
262
+ # Mount the Stream's API endpoints
263
+ stream.mount(app)
264
+
265
+ # Serve the custom interface
266
+ templates = Jinja2Templates(directory="templates")
267
+
268
+ @app.get("/", response_class=HTMLResponse)
269
+ async def get_root(request: Request):
270
+ # Create a simple template with our custom HTML
271
+ html_content = f"""
272
+ <!DOCTYPE html>
273
+ <html>
274
+ <head>
275
+ <title>LLM Voice Chat (Powered by DeepSeek & ElevenLabs)</title>
276
+ <meta charset="utf-8">
277
+ <meta name="viewport" content="width=device-width, initial-scale=1">
278
+ <script src="/static/webrtc_client.js"></script>
279
+ </head>
280
+ <body>
281
+ {custom_html}
282
+ </body>
283
+ </html>
284
+ """
285
+ return HTMLResponse(content=html_content)
286
+
287
+ # Add WebRTC endpoints
288
+ @app.post("/webrtc/offer")
289
+ async def webrtc_offer(request: Request):
290
+ # Import the webrtc_handler
291
+ from fastrtc_voice_assistant.webrtc_handler import handle_offer
292
+
293
+ offer_data = await request.json()
294
+
295
+ # Define callback for audio processing
296
+ def audio_callback(frame):
297
+ # Process audio frame and feed to the STT model
298
+ # This is a simplified callback; you'll need to adapt this to your needs
299
+ pass
300
+
301
+ answer = await handle_offer(offer_data, audio_callback)
302
+ return answer
303
+
304
+ @app.post("/webrtc/ice-candidate")
305
+ async def webrtc_ice_candidate(request: Request):
306
+ from fastrtc_voice_assistant.webrtc_handler import add_ice_candidate
307
+
308
+ candidate_data = await request.json()
309
+ # You would need to get the appropriate PC here
310
+ pc = None # This needs to be handled properly
311
+ await add_ice_candidate(candidate_data, pc)
312
+ return {"status": "ok"}
313
 
314
  # Update the chat completion part based on available methods:
 
315
  def get_deepseek_response(messages):
316
  url = "https://api.deepseek.com/v1/chat/completions"
317
  headers = {
 
458
  print(f"Exception in text_to_speech: {e}")
459
  yield None
460
 
 
 
 
461
  if __name__ == "__main__":
462
  os.environ["GRADIO_SSR_MODE"] = "false"
463