Spaces:

Twelve2five
/

fastrtc-voice-assistant

Runtime error

App Files Files Community

Twelve2five commited on Mar 16

Commit

2bd4006

verified ·

1 Parent(s): 1709348

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -237

app.py CHANGED Viewed

@@ -27,14 +27,6 @@ from deepseek import DeepSeekAPI
 # Load environment variables
 load_dotenv()
-# Add this RTC configuration for Hugging Face Spaces
-# This is critical for WebRTC to work properly in Spaces
-rtc_config = {
-    "iceServers": [
-        {"urls": ["stun:stun.l.google.com:19302", "stun:stun1.l.google.com:19302"]}
-    ]
-}
 # Initialize clients
 elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
 stt_model = get_stt_model()
@@ -75,155 +67,8 @@ def response(
     yield AdditionalOutputs(chatbot)
-# Custom HTML for a better-looking interface
-custom_html = """
-<div class="container">
-  <div class="voice-chat-container">
-    <div id="chat-messages" class="chat-messages">
-      <!-- Messages will appear here -->
-    </div>
-    <div class="audio-controls">
-      <button id="mic-button" class="mic-button">
-        <span class="mic-icon">🎤</span>
-      </button>
-      <div id="status-indicator" class="status-indicator">Ready</div>
-    </div>
-  </div>
-  <audio id="ai-response-audio" autoplay></audio>
-</div>
-<script src="file=webrtc_client.js"></script>
-<script>
-document.addEventListener('DOMContentLoaded', function() {
-  const micButton = document.getElementById('mic-button');
-  const statusIndicator = document.getElementById('status-indicator');
-  let isRecording = false;
-  // Initialize WebRTC
-  setupWebRTC().then(() => {
-    statusIndicator.textContent = 'Ready';
-  }).catch(error => {
-    console.error('WebRTC setup failed:', error);
-    statusIndicator.textContent = 'Microphone access failed';
-  });
-  // Toggle recording on button click
-  micButton.addEventListener('click', function() {
-    if (isRecording) {
-      stopRecording();
-      micButton.classList.remove('recording');
-      statusIndicator.textContent = 'Stopped';
-    } else {
-      startRecording();
-      micButton.classList.add('recording');
-      statusIndicator.textContent = 'Listening...';
-    }
-    isRecording = !isRecording;
-  });
-});
-// Function to update chat interface
-function updateChat(message, isUser) {
-  const chatMessages = document.getElementById('chat-messages');
-  const messageDiv = document.createElement('div');
-  messageDiv.className = isUser ? 'user-message' : 'ai-message';
-  messageDiv.textContent = message;
-  chatMessages.appendChild(messageDiv);
-  chatMessages.scrollTop = chatMessages.scrollHeight;
-}
-</script>
-<style>
-.container {
-  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
-  max-width: 800px;
-  margin: 0 auto;
-  padding: 20px;
-}
-.voice-chat-container {
-  background-color: #f8f9fa;
-  border-radius: 10px;
-  box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
-  overflow: hidden;
-  display: flex;
-  flex-direction: column;
-  height: 500px;
-}
-.chat-messages {
-  flex: 1;
-  overflow-y: auto;
-  padding: 20px;
-}
-.user-message, .ai-message {
-  padding: 10px 15px;
-  border-radius: 18px;
-  margin-bottom: 10px;
-  max-width: 70%;
-  word-wrap: break-word;
-}
-.user-message {
-  background-color: #005fff;
-  color: white;
-  margin-left: auto;
-}
-.ai-message {
-  background-color: #e9e9eb;
-  color: #333;
-}
-.audio-controls {
-  padding: 15px;
-  display: flex;
-  align-items: center;
-  background-color: #ffffff;
-  border-top: 1px solid #e0e0e0;
-}
-.mic-button {
-  width: 60px;
-  height: 60px;
-  border-radius: 50%;
-  background-color: #f0f0f0;
-  border: none;
-  cursor: pointer;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  transition: background-color 0.3s;
-}
-.mic-button:hover {
-  background-color: #e0e0e0;
-}
-.mic-button.recording {
-  background-color: #ff4b4b;
-}
-.mic-icon {
-  font-size: 24px;
-}
-.status-indicator {
-  margin-left: 15px;
-  color: #666;
-  font-size: 14px;
-}
-</style>
-"""
-# Create Gradio interface with custom HTML and CSS
-with gr.Blocks() as custom_interface:
-    gr.HTML(custom_html)
-    # Hidden chatbot to store conversation state
-    chatbot = gr.Chatbot(visible=False)
-# Create Stream with RTC configuration
 stream = Stream(
     modality="audio",
     mode="send-receive",
@@ -231,87 +76,17 @@ stream = Stream(
     additional_outputs_handler=lambda a, b: b,
     additional_inputs=[chatbot],
     additional_outputs=[chatbot],
-    ui_args={"title": "LLM Voice Chat (Powered by DeepSeek & ElevenLabs)"},
-    rtc_configuration=rtc_config
 )
-# Create FastAPI app and mount stream and custom interface
-from fastapi import FastAPI, Request
-from fastapi.responses import HTMLResponse
-from fastapi.staticfiles import StaticFiles
-from fastapi.templating import Jinja2Templates
-from pathlib import Path
 app = FastAPI()
-# Mount static files directory
-static_dir = Path(__file__).parent / "static"
-static_dir.mkdir(exist_ok=True)
-# Copy the WebRTC client JS to static directory
-webrtc_client_js = Path(__file__).parent / "fastrtc-voice-assistant" / "webrtc_client.js"
-if webrtc_client_js.exists():
-    with open(webrtc_client_js, "r") as src_file:
-        webrtc_js_content = src_file.read()
-    with open(static_dir / "webrtc_client.js", "w") as dest_file:
-        dest_file.write(webrtc_js_content)
-app.mount("/static", StaticFiles(directory=static_dir), name="static")
-# Mount the Stream's API endpoints
-stream.mount(app)
-# Serve the custom interface
-templates = Jinja2Templates(directory="templates")
-@app.get("/", response_class=HTMLResponse)
-async def get_root(request: Request):
-    # Create a simple template with our custom HTML
-    html_content = f"""
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <title>LLM Voice Chat (Powered by DeepSeek & ElevenLabs)</title>
-        <meta charset="utf-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1">
-        <script src="/static/webrtc_client.js"></script>
-    </head>
-    <body>
-        {custom_html}
-    </body>
-    </html>
-    """
-    return HTMLResponse(content=html_content)
-# Add WebRTC endpoints
-@app.post("/webrtc/offer")
-async def webrtc_offer(request: Request):
-    # Import the webrtc_handler
-    from fastrtc_voice_assistant.webrtc_handler import handle_offer
-    offer_data = await request.json()
-    # Define callback for audio processing
-    def audio_callback(frame):
-        # Process audio frame and feed to the STT model
-        # This is a simplified callback; you'll need to adapt this to your needs
-        pass
-    answer = await handle_offer(offer_data, audio_callback)
-    return answer
-@app.post("/webrtc/ice-candidate")
-async def webrtc_ice_candidate(request: Request):
-    from fastrtc_voice_assistant.webrtc_handler import add_ice_candidate
-    candidate_data = await request.json()
-    # You would need to get the appropriate PC here
-    pc = None  # This needs to be handled properly
-    await add_ice_candidate(candidate_data, pc)
-    return {"status": "ok"}
 # Update the chat completion part based on available methods:
 def get_deepseek_response(messages):
     url = "https://api.deepseek.com/v1/chat/completions"
     headers = {
@@ -458,13 +233,25 @@ def text_to_speech(text):
         print(f"Exception in text_to_speech: {e}")
         yield None
 if __name__ == "__main__":
     os.environ["GRADIO_SSR_MODE"] = "false"
-    # Check FastRTC version
     import fastrtc
     print(f"FastRTC version: {fastrtc.__version__ if hasattr(fastrtc, '__version__') else 'unknown'}")
-    # Use a simpler startup method compatible with Hugging Face Spaces
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 # Load environment variables
 load_dotenv()
 # Initialize clients
 elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
 stt_model = get_stt_model()
     yield AdditionalOutputs(chatbot)
+# Create Gradio interface
+chatbot = gr.Chatbot(type="messages")
 stream = Stream(
     modality="audio",
     mode="send-receive",
     additional_outputs_handler=lambda a, b: b,
     additional_inputs=[chatbot],
     additional_outputs=[chatbot],
+    ui_args={"title": "LLM Voice Chat (Powered by DeepSeek & ElevenLabs)"}
 )
+# Create FastAPI app and mount stream
+from fastapi import FastAPI
 app = FastAPI()
+app = gr.mount_gradio_app(app, stream.ui, path="/")
+stream.mount(app)  # Mount the stream for telephone/fastphone integration
 # Update the chat completion part based on available methods:
+# We'll use direct HTTP requests as a fallback since the API structure is unclear:
 def get_deepseek_response(messages):
     url = "https://api.deepseek.com/v1/chat/completions"
     headers = {
         print(f"Exception in text_to_speech: {e}")
         yield None
+# Add this debug statement AFTER the function definition
+print("text_to_speech function:", inspect.getsource(text_to_speech))
 if __name__ == "__main__":
+    # HF Spaces configuration
     os.environ["GRADIO_SSR_MODE"] = "false"
+    # Check if running on Hugging Face Spaces
+    HF_SPACE = os.getenv("HF_SPACE", False)
+    PORT = int(os.getenv("PORT", 7860))
+    # Remove debug code in production
     import fastrtc
     print(f"FastRTC version: {fastrtc.__version__ if hasattr(fastrtc, '__version__') else 'unknown'}")
+    # Launch with web interface (appropriate for Hugging Face Spaces)
+    stream.ui.launch(
+        server_name="0.0.0.0",  # Bind to all interfaces
+        server_port=PORT,
+        share=False,  # No need for sharing on HF Spaces
+        debug=False   # Disable debug in production
+    )