Spaces:

Twelve2five
/

fastrtc-voice-assistant

Runtime error

App Files Files Community

Twelve2five commited on Mar 16

Commit

1709348

verified ·

1 Parent(s): dcc612f

Update app.py

Browse files

Files changed (1) hide show

app.py +222 -26

app.py CHANGED Viewed

@@ -75,25 +75,155 @@ def response(
     yield AdditionalOutputs(chatbot)
-# Create Gradio interface
-chatbot = gr.Chatbot(type="messages", height=500, label="Conversation")
-# Define enhanced UI arguments
-enhanced_ui_args = {
-    "title": "LLM Voice Chat (Powered by DeepSeek & ElevenLabs)",
-    "description": "Speak after clicking the microphone button below. Your conversation will appear in the chat.",
-    "theme": gr.themes.Soft(),
-    "css": """
-        .gradio-container {
-            min-height: 600px;
-        }
-        #chatbot {
-            min-height: 400px;
-        }
-    """
 }
-# Create Stream with enhanced UI args and the RTC configuration
 stream = Stream(
     modality="audio",
     mode="send-receive",
@@ -101,18 +231,87 @@ stream = Stream(
     additional_outputs_handler=lambda a, b: b,
     additional_inputs=[chatbot],
     additional_outputs=[chatbot],
-    ui_args=enhanced_ui_args,
     rtc_configuration=rtc_config
 )
-# Create FastAPI app and mount stream
-from fastapi import FastAPI
 app = FastAPI()
-app = gr.mount_gradio_app(app, stream.ui, path="/")
-stream.mount(app)  # Mount the stream for telephone/fastphone integration
 # Update the chat completion part based on available methods:
-# We'll use direct HTTP requests as a fallback since the API structure is unclear:
 def get_deepseek_response(messages):
     url = "https://api.deepseek.com/v1/chat/completions"
     headers = {
@@ -259,9 +458,6 @@ def text_to_speech(text):
         print(f"Exception in text_to_speech: {e}")
         yield None
-# Add this debug statement AFTER the function definition
-print("text_to_speech function:", inspect.getsource(text_to_speech))
 if __name__ == "__main__":
     os.environ["GRADIO_SSR_MODE"] = "false"

     yield AdditionalOutputs(chatbot)
+# Custom HTML for a better-looking interface
+custom_html = """
+<div class="container">
+  <div class="voice-chat-container">
+    <div id="chat-messages" class="chat-messages">
+      <!-- Messages will appear here -->
+    </div>
+    <div class="audio-controls">
+      <button id="mic-button" class="mic-button">
+        <span class="mic-icon">🎤</span>
+      </button>
+      <div id="status-indicator" class="status-indicator">Ready</div>
+    </div>
+  </div>
+  <audio id="ai-response-audio" autoplay></audio>
+</div>
+<script src="file=webrtc_client.js"></script>
+<script>
+document.addEventListener('DOMContentLoaded', function() {
+  const micButton = document.getElementById('mic-button');
+  const statusIndicator = document.getElementById('status-indicator');
+  let isRecording = false;
+  // Initialize WebRTC
+  setupWebRTC().then(() => {
+    statusIndicator.textContent = 'Ready';
+  }).catch(error => {
+    console.error('WebRTC setup failed:', error);
+    statusIndicator.textContent = 'Microphone access failed';
+  });
+  // Toggle recording on button click
+  micButton.addEventListener('click', function() {
+    if (isRecording) {
+      stopRecording();
+      micButton.classList.remove('recording');
+      statusIndicator.textContent = 'Stopped';
+    } else {
+      startRecording();
+      micButton.classList.add('recording');
+      statusIndicator.textContent = 'Listening...';
+    }
+    isRecording = !isRecording;
+  });
+});
+// Function to update chat interface
+function updateChat(message, isUser) {
+  const chatMessages = document.getElementById('chat-messages');
+  const messageDiv = document.createElement('div');
+  messageDiv.className = isUser ? 'user-message' : 'ai-message';
+  messageDiv.textContent = message;
+  chatMessages.appendChild(messageDiv);
+  chatMessages.scrollTop = chatMessages.scrollHeight;
+}
+</script>
+<style>
+.container {
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+  max-width: 800px;
+  margin: 0 auto;
+  padding: 20px;
+}
+.voice-chat-container {
+  background-color: #f8f9fa;
+  border-radius: 10px;
+  box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+  overflow: hidden;
+  display: flex;
+  flex-direction: column;
+  height: 500px;
+}
+.chat-messages {
+  flex: 1;
+  overflow-y: auto;
+  padding: 20px;
+}
+.user-message, .ai-message {
+  padding: 10px 15px;
+  border-radius: 18px;
+  margin-bottom: 10px;
+  max-width: 70%;
+  word-wrap: break-word;
+}
+.user-message {
+  background-color: #005fff;
+  color: white;
+  margin-left: auto;
+}
+.ai-message {
+  background-color: #e9e9eb;
+  color: #333;
+}
+.audio-controls {
+  padding: 15px;
+  display: flex;
+  align-items: center;
+  background-color: #ffffff;
+  border-top: 1px solid #e0e0e0;
+}
+.mic-button {
+  width: 60px;
+  height: 60px;
+  border-radius: 50%;
+  background-color: #f0f0f0;
+  border: none;
+  cursor: pointer;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  transition: background-color 0.3s;
+}
+.mic-button:hover {
+  background-color: #e0e0e0;
 }
+.mic-button.recording {
+  background-color: #ff4b4b;
+}
+.mic-icon {
+  font-size: 24px;
+}
+.status-indicator {
+  margin-left: 15px;
+  color: #666;
+  font-size: 14px;
+}
+</style>
+"""
+# Create Gradio interface with custom HTML and CSS
+with gr.Blocks() as custom_interface:
+    gr.HTML(custom_html)
+    # Hidden chatbot to store conversation state
+    chatbot = gr.Chatbot(visible=False)
+# Create Stream with RTC configuration
 stream = Stream(
     modality="audio",
     mode="send-receive",
     additional_outputs_handler=lambda a, b: b,
     additional_inputs=[chatbot],
     additional_outputs=[chatbot],
+    ui_args={"title": "LLM Voice Chat (Powered by DeepSeek & ElevenLabs)"},
     rtc_configuration=rtc_config
 )
+# Create FastAPI app and mount stream and custom interface
+from fastapi import FastAPI, Request
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from pathlib import Path
 app = FastAPI()
+# Mount static files directory
+static_dir = Path(__file__).parent / "static"
+static_dir.mkdir(exist_ok=True)
+# Copy the WebRTC client JS to static directory
+webrtc_client_js = Path(__file__).parent / "fastrtc-voice-assistant" / "webrtc_client.js"
+if webrtc_client_js.exists():
+    with open(webrtc_client_js, "r") as src_file:
+        webrtc_js_content = src_file.read()
+    with open(static_dir / "webrtc_client.js", "w") as dest_file:
+        dest_file.write(webrtc_js_content)
+app.mount("/static", StaticFiles(directory=static_dir), name="static")
+# Mount the Stream's API endpoints
+stream.mount(app)
+# Serve the custom interface
+templates = Jinja2Templates(directory="templates")
+@app.get("/", response_class=HTMLResponse)
+async def get_root(request: Request):
+    # Create a simple template with our custom HTML
+    html_content = f"""
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>LLM Voice Chat (Powered by DeepSeek & ElevenLabs)</title>
+        <meta charset="utf-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1">
+        <script src="/static/webrtc_client.js"></script>
+    </head>
+    <body>
+        {custom_html}
+    </body>
+    </html>
+    """
+    return HTMLResponse(content=html_content)
+# Add WebRTC endpoints
+@app.post("/webrtc/offer")
+async def webrtc_offer(request: Request):
+    # Import the webrtc_handler
+    from fastrtc_voice_assistant.webrtc_handler import handle_offer
+    offer_data = await request.json()
+    # Define callback for audio processing
+    def audio_callback(frame):
+        # Process audio frame and feed to the STT model
+        # This is a simplified callback; you'll need to adapt this to your needs
+        pass
+    answer = await handle_offer(offer_data, audio_callback)
+    return answer
+@app.post("/webrtc/ice-candidate")
+async def webrtc_ice_candidate(request: Request):
+    from fastrtc_voice_assistant.webrtc_handler import add_ice_candidate
+    candidate_data = await request.json()
+    # You would need to get the appropriate PC here
+    pc = None  # This needs to be handled properly
+    await add_ice_candidate(candidate_data, pc)
+    return {"status": "ok"}
 # Update the chat completion part based on available methods:
 def get_deepseek_response(messages):
     url = "https://api.deepseek.com/v1/chat/completions"
     headers = {
         print(f"Exception in text_to_speech: {e}")
         yield None
 if __name__ == "__main__":
     os.environ["GRADIO_SSR_MODE"] = "false"