Twelve2five commited on
Commit
2bd4006
·
verified ·
1 Parent(s): 1709348

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -237
app.py CHANGED
@@ -27,14 +27,6 @@ from deepseek import DeepSeekAPI
27
  # Load environment variables
28
  load_dotenv()
29
 
30
- # Add this RTC configuration for Hugging Face Spaces
31
- # This is critical for WebRTC to work properly in Spaces
32
- rtc_config = {
33
- "iceServers": [
34
- {"urls": ["stun:stun.l.google.com:19302", "stun:stun1.l.google.com:19302"]}
35
- ]
36
- }
37
-
38
  # Initialize clients
39
  elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
40
  stt_model = get_stt_model()
@@ -75,155 +67,8 @@ def response(
75
 
76
  yield AdditionalOutputs(chatbot)
77
 
78
- # Custom HTML for a better-looking interface
79
- custom_html = """
80
- <div class="container">
81
- <div class="voice-chat-container">
82
- <div id="chat-messages" class="chat-messages">
83
- <!-- Messages will appear here -->
84
- </div>
85
- <div class="audio-controls">
86
- <button id="mic-button" class="mic-button">
87
- <span class="mic-icon">🎤</span>
88
- </button>
89
- <div id="status-indicator" class="status-indicator">Ready</div>
90
- </div>
91
- </div>
92
- <audio id="ai-response-audio" autoplay></audio>
93
- </div>
94
-
95
- <script src="file=webrtc_client.js"></script>
96
- <script>
97
- document.addEventListener('DOMContentLoaded', function() {
98
- const micButton = document.getElementById('mic-button');
99
- const statusIndicator = document.getElementById('status-indicator');
100
- let isRecording = false;
101
-
102
- // Initialize WebRTC
103
- setupWebRTC().then(() => {
104
- statusIndicator.textContent = 'Ready';
105
- }).catch(error => {
106
- console.error('WebRTC setup failed:', error);
107
- statusIndicator.textContent = 'Microphone access failed';
108
- });
109
-
110
- // Toggle recording on button click
111
- micButton.addEventListener('click', function() {
112
- if (isRecording) {
113
- stopRecording();
114
- micButton.classList.remove('recording');
115
- statusIndicator.textContent = 'Stopped';
116
- } else {
117
- startRecording();
118
- micButton.classList.add('recording');
119
- statusIndicator.textContent = 'Listening...';
120
- }
121
- isRecording = !isRecording;
122
- });
123
- });
124
-
125
- // Function to update chat interface
126
- function updateChat(message, isUser) {
127
- const chatMessages = document.getElementById('chat-messages');
128
- const messageDiv = document.createElement('div');
129
- messageDiv.className = isUser ? 'user-message' : 'ai-message';
130
- messageDiv.textContent = message;
131
- chatMessages.appendChild(messageDiv);
132
- chatMessages.scrollTop = chatMessages.scrollHeight;
133
- }
134
- </script>
135
-
136
- <style>
137
- .container {
138
- font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
139
- max-width: 800px;
140
- margin: 0 auto;
141
- padding: 20px;
142
- }
143
-
144
- .voice-chat-container {
145
- background-color: #f8f9fa;
146
- border-radius: 10px;
147
- box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
148
- overflow: hidden;
149
- display: flex;
150
- flex-direction: column;
151
- height: 500px;
152
- }
153
-
154
- .chat-messages {
155
- flex: 1;
156
- overflow-y: auto;
157
- padding: 20px;
158
- }
159
-
160
- .user-message, .ai-message {
161
- padding: 10px 15px;
162
- border-radius: 18px;
163
- margin-bottom: 10px;
164
- max-width: 70%;
165
- word-wrap: break-word;
166
- }
167
-
168
- .user-message {
169
- background-color: #005fff;
170
- color: white;
171
- margin-left: auto;
172
- }
173
-
174
- .ai-message {
175
- background-color: #e9e9eb;
176
- color: #333;
177
- }
178
-
179
- .audio-controls {
180
- padding: 15px;
181
- display: flex;
182
- align-items: center;
183
- background-color: #ffffff;
184
- border-top: 1px solid #e0e0e0;
185
- }
186
-
187
- .mic-button {
188
- width: 60px;
189
- height: 60px;
190
- border-radius: 50%;
191
- background-color: #f0f0f0;
192
- border: none;
193
- cursor: pointer;
194
- display: flex;
195
- align-items: center;
196
- justify-content: center;
197
- transition: background-color 0.3s;
198
- }
199
-
200
- .mic-button:hover {
201
- background-color: #e0e0e0;
202
- }
203
-
204
- .mic-button.recording {
205
- background-color: #ff4b4b;
206
- }
207
-
208
- .mic-icon {
209
- font-size: 24px;
210
- }
211
-
212
- .status-indicator {
213
- margin-left: 15px;
214
- color: #666;
215
- font-size: 14px;
216
- }
217
- </style>
218
- """
219
-
220
- # Create Gradio interface with custom HTML and CSS
221
- with gr.Blocks() as custom_interface:
222
- gr.HTML(custom_html)
223
- # Hidden chatbot to store conversation state
224
- chatbot = gr.Chatbot(visible=False)
225
-
226
- # Create Stream with RTC configuration
227
  stream = Stream(
228
  modality="audio",
229
  mode="send-receive",
@@ -231,87 +76,17 @@ stream = Stream(
231
  additional_outputs_handler=lambda a, b: b,
232
  additional_inputs=[chatbot],
233
  additional_outputs=[chatbot],
234
- ui_args={"title": "LLM Voice Chat (Powered by DeepSeek & ElevenLabs)"},
235
- rtc_configuration=rtc_config
236
  )
237
 
238
- # Create FastAPI app and mount stream and custom interface
239
- from fastapi import FastAPI, Request
240
- from fastapi.responses import HTMLResponse
241
- from fastapi.staticfiles import StaticFiles
242
- from fastapi.templating import Jinja2Templates
243
- from pathlib import Path
244
-
245
  app = FastAPI()
246
-
247
- # Mount static files directory
248
- static_dir = Path(__file__).parent / "static"
249
- static_dir.mkdir(exist_ok=True)
250
-
251
- # Copy the WebRTC client JS to static directory
252
- webrtc_client_js = Path(__file__).parent / "fastrtc-voice-assistant" / "webrtc_client.js"
253
- if webrtc_client_js.exists():
254
- with open(webrtc_client_js, "r") as src_file:
255
- webrtc_js_content = src_file.read()
256
-
257
- with open(static_dir / "webrtc_client.js", "w") as dest_file:
258
- dest_file.write(webrtc_js_content)
259
-
260
- app.mount("/static", StaticFiles(directory=static_dir), name="static")
261
-
262
- # Mount the Stream's API endpoints
263
- stream.mount(app)
264
-
265
- # Serve the custom interface
266
- templates = Jinja2Templates(directory="templates")
267
-
268
- @app.get("/", response_class=HTMLResponse)
269
- async def get_root(request: Request):
270
- # Create a simple template with our custom HTML
271
- html_content = f"""
272
- <!DOCTYPE html>
273
- <html>
274
- <head>
275
- <title>LLM Voice Chat (Powered by DeepSeek & ElevenLabs)</title>
276
- <meta charset="utf-8">
277
- <meta name="viewport" content="width=device-width, initial-scale=1">
278
- <script src="/static/webrtc_client.js"></script>
279
- </head>
280
- <body>
281
- {custom_html}
282
- </body>
283
- </html>
284
- """
285
- return HTMLResponse(content=html_content)
286
-
287
- # Add WebRTC endpoints
288
- @app.post("/webrtc/offer")
289
- async def webrtc_offer(request: Request):
290
- # Import the webrtc_handler
291
- from fastrtc_voice_assistant.webrtc_handler import handle_offer
292
-
293
- offer_data = await request.json()
294
-
295
- # Define callback for audio processing
296
- def audio_callback(frame):
297
- # Process audio frame and feed to the STT model
298
- # This is a simplified callback; you'll need to adapt this to your needs
299
- pass
300
-
301
- answer = await handle_offer(offer_data, audio_callback)
302
- return answer
303
-
304
- @app.post("/webrtc/ice-candidate")
305
- async def webrtc_ice_candidate(request: Request):
306
- from fastrtc_voice_assistant.webrtc_handler import add_ice_candidate
307
-
308
- candidate_data = await request.json()
309
- # You would need to get the appropriate PC here
310
- pc = None # This needs to be handled properly
311
- await add_ice_candidate(candidate_data, pc)
312
- return {"status": "ok"}
313
 
314
  # Update the chat completion part based on available methods:
 
315
  def get_deepseek_response(messages):
316
  url = "https://api.deepseek.com/v1/chat/completions"
317
  headers = {
@@ -458,13 +233,25 @@ def text_to_speech(text):
458
  print(f"Exception in text_to_speech: {e}")
459
  yield None
460
 
 
 
 
461
  if __name__ == "__main__":
 
462
  os.environ["GRADIO_SSR_MODE"] = "false"
463
 
464
- # Check FastRTC version
 
 
 
 
465
  import fastrtc
466
  print(f"FastRTC version: {fastrtc.__version__ if hasattr(fastrtc, '__version__') else 'unknown'}")
467
 
468
- # Use a simpler startup method compatible with Hugging Face Spaces
469
- import uvicorn
470
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
27
  # Load environment variables
28
  load_dotenv()
29
 
 
 
 
 
 
 
 
 
30
  # Initialize clients
31
  elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
32
  stt_model = get_stt_model()
 
67
 
68
  yield AdditionalOutputs(chatbot)
69
 
70
+ # Create Gradio interface
71
+ chatbot = gr.Chatbot(type="messages")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  stream = Stream(
73
  modality="audio",
74
  mode="send-receive",
 
76
  additional_outputs_handler=lambda a, b: b,
77
  additional_inputs=[chatbot],
78
  additional_outputs=[chatbot],
79
+ ui_args={"title": "LLM Voice Chat (Powered by DeepSeek & ElevenLabs)"}
 
80
  )
81
 
82
+ # Create FastAPI app and mount stream
83
+ from fastapi import FastAPI
 
 
 
 
 
84
  app = FastAPI()
85
+ app = gr.mount_gradio_app(app, stream.ui, path="/")
86
+ stream.mount(app) # Mount the stream for telephone/fastphone integration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  # Update the chat completion part based on available methods:
89
+ # We'll use direct HTTP requests as a fallback since the API structure is unclear:
90
  def get_deepseek_response(messages):
91
  url = "https://api.deepseek.com/v1/chat/completions"
92
  headers = {
 
233
  print(f"Exception in text_to_speech: {e}")
234
  yield None
235
 
236
+ # Add this debug statement AFTER the function definition
237
+ print("text_to_speech function:", inspect.getsource(text_to_speech))
238
+
239
  if __name__ == "__main__":
240
+ # HF Spaces configuration
241
  os.environ["GRADIO_SSR_MODE"] = "false"
242
 
243
+ # Check if running on Hugging Face Spaces
244
+ HF_SPACE = os.getenv("HF_SPACE", False)
245
+ PORT = int(os.getenv("PORT", 7860))
246
+
247
+ # Remove debug code in production
248
  import fastrtc
249
  print(f"FastRTC version: {fastrtc.__version__ if hasattr(fastrtc, '__version__') else 'unknown'}")
250
 
251
+ # Launch with web interface (appropriate for Hugging Face Spaces)
252
+ stream.ui.launch(
253
+ server_name="0.0.0.0", # Bind to all interfaces
254
+ server_port=PORT,
255
+ share=False, # No need for sharing on HF Spaces
256
+ debug=False # Disable debug in production
257
+ )