Spaces:

Twelve2five
/

fastrtc-voice-assistant

Runtime error

App Files Files Community

Twelve2five commited on Mar 17

Commit

01f7ec4

verified ·

1 Parent(s): 797af4f

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -67

app.py CHANGED Viewed

@@ -55,35 +55,35 @@ class DeepSeekAPI:
 deepseek_client = DeepSeekAPI(api_key=os.getenv("DEEPSEEK_API_KEY"))
-# Define handler function for FastRTC Stream
 def response(
     audio: tuple[int, np.ndarray],
-    chatbot=None,
 ):
-    # Initialize chatbot if None
     chatbot = chatbot or []
-    messages = [{"role": msg[0], "content": msg[1]} for msg in chatbot] if chatbot else []
     # Convert speech to text
     text = stt_model.stt(audio)
     logger.info(f"User said: {text}")
     # Add user message to chat
-    chatbot.append(("user", text))
     yield AdditionalOutputs(chatbot)
     # Get AI response
-    formatted_messages = []
-    for role, content in chatbot:
-        formatted_messages.append({"role": "user" if role == "user" else "assistant", "content": content})
     # Call DeepSeek API
-    response_data = deepseek_client.chat_completion(formatted_messages)
     response_text = response_data["choices"][0]["message"]["content"]
     logger.info(f"DeepSeek response: {response_text[:50]}...")
-    # Add AI response to chat
-    chatbot.append(("assistant", response_text))
     # Convert response to speech
     if os.getenv("ELEVENLABS_API_KEY"):
@@ -108,8 +108,6 @@ def response(
         # Fall back to gTTS
         logger.info("ElevenLabs API key not found, using gTTS...")
         yield from use_gtts_for_text(response_text)
-    yield AdditionalOutputs(chatbot)
 def use_gtts_for_text(text):
     """Helper function to generate speech with gTTS for the entire text"""
@@ -158,38 +156,22 @@ def use_gtts_for_text(text):
         logger.error(f"gTTS error: {e}")
         yield None
-# Enhanced WebRTC configuration
 rtc_configuration = {
     "iceServers": [
         {"urls": ["stun:stun.l.google.com:19302"]},
-        {"urls": ["stun:stun1.l.google.com:19302"]},
         {
             "urls": ["turn:openrelay.metered.ca:80"],
             "username": "openrelayproject",
             "credential": "openrelayproject"
-        },
-        {
-            "urls": ["turn:openrelay.metered.ca:443?transport=tcp"],
-            "username": "openrelayproject",
-            "credential": "openrelayproject"
         }
-    ],
-    "iceCandidatePoolSize": 10
 }
-# Build the interface - we need separate Blocks for chatbot and Stream
-with gr.Blocks(title="LLM Voice Assistant") as demo:
-    gr.Markdown("# LLM Voice Chat (Powered by DeepSeek & ElevenLabs)")
-    gr.Markdown("Click the microphone button to start speaking")
-    # Create the main chatbot display
-    chatbot = gr.Chatbot(label="Conversation")
-    # Create the Stream component outside of the Blocks context to avoid conflicts
-    # We'll insert it into the interface later
-    stream_container = gr.HTML("<div id='stream-placeholder'>Loading WebRTC component...</div>")
-# Create the FastRTC Stream separately
 stream = Stream(
     modality="audio",
     mode="send-receive",
@@ -197,40 +179,13 @@ stream = Stream(
     additional_outputs_handler=lambda a, b: b,
     additional_inputs=[chatbot],
     additional_outputs=[chatbot],
-    rtc_configuration=rtc_configuration
 )
-# Custom mount function
-def mount_components():
-    import gradio as gr
-    import os
-    # Get the main interface
-    main_interface = demo
-    # Add the Stream interface to a custom Blocks
-    with gr.Blocks(analytics_enabled=False) as stream_interface:
-        stream.render()
-    # Create a custom app that hosts both interfaces on different routes
-    app = gr.routes.App()
-    app.add_route("/", main_interface)
-    app.add_route("/stream", stream_interface)
-    # Launch the combined app
-    app.launch()
-# Launch with the mount function
 if __name__ == "__main__":
-    # Local development
-    demo.launch(share=True)
-    # Launch the Stream component separately for local development
-    stream.ui.launch(server_port=7861, share=True)
-else:
-    # For Hugging Face Spaces
-    # Initialize FastRTC in Spaces
-    app = gr.mount_gradio_app(stream.app, demo, path="/")
-    # Launch both components
-    gr.launch_app(app)

 deepseek_client = DeepSeekAPI(api_key=os.getenv("DEEPSEEK_API_KEY"))
 def response(
     audio: tuple[int, np.ndarray],
+    chatbot: list[tuple] | None = None,
 ):
     chatbot = chatbot or []
     # Convert speech to text
     text = stt_model.stt(audio)
     logger.info(f"User said: {text}")
     # Add user message to chat
+    chatbot.append((text, None))
     yield AdditionalOutputs(chatbot)
     # Get AI response
+    messages = []
+    for user_text, assistant_text in chatbot:
+        messages.append({"role": "user", "content": user_text})
+        if assistant_text:
+            messages.append({"role": "assistant", "content": assistant_text})
     # Call DeepSeek API
+    response_data = deepseek_client.chat_completion(messages)
     response_text = response_data["choices"][0]["message"]["content"]
     logger.info(f"DeepSeek response: {response_text[:50]}...")
+    # Update chatbot with AI response
+    chatbot[-1] = (text, response_text)
+    yield AdditionalOutputs(chatbot)
     # Convert response to speech
     if os.getenv("ELEVENLABS_API_KEY"):
         # Fall back to gTTS
         logger.info("ElevenLabs API key not found, using gTTS...")
         yield from use_gtts_for_text(response_text)
 def use_gtts_for_text(text):
     """Helper function to generate speech with gTTS for the entire text"""
         logger.error(f"gTTS error: {e}")
         yield None
+# Basic WebRTC configuration - just the minimum needed
 rtc_configuration = {
     "iceServers": [
         {"urls": ["stun:stun.l.google.com:19302"]},
         {
             "urls": ["turn:openrelay.metered.ca:80"],
             "username": "openrelayproject",
             "credential": "openrelayproject"
         }
+    ]
 }
+# Create chatbot component for tracking conversation
+chatbot = gr.Chatbot()
+# Create Stream outside of any blocks context
 stream = Stream(
     modality="audio",
     mode="send-receive",
     additional_outputs_handler=lambda a, b: b,
     additional_inputs=[chatbot],
     additional_outputs=[chatbot],
+    rtc_configuration=rtc_configuration,
+    ui_args={"title": "LLM Voice Chat (DeepSeek & ElevenLabs)"}
 )
+# Export the UI directly
+demo = stream.ui
+# Expose the demo for Hugging Face Spaces
 if __name__ == "__main__":
+    demo.launch()