import os import time import gradio as gr import numpy as np from dotenv import load_dotenv from distil_whisper_fastrtc import get_stt_model from fastapi import FastAPI from fastrtc import ( AdditionalOutputs, ReplyOnPause, Stream, get_tts_model, get_hf_turn_credentials, ) from gradio.utils import get_space from groq import Groq from numpy.typing import NDArray load_dotenv() groq_client = Groq() tts_model = get_tts_model() stt_model = get_stt_model() credentials = get_hf_turn_credentials(token=None) # See "Talk to Claude" in Cookbook for an example of how to keep # track of the chat history. def response( audio: tuple[int, NDArray[np.int16 | np.float32]], chatbot: list[dict] | None = None, ): chatbot = chatbot or [] messages = [{"role": d["role"], "content": d["content"]} for d in chatbot] start = time.time() text = stt_model.stt(audio) print("transcription", time.time() - start) print("prompt", text) chatbot.append({"role": "user", "content": text}) yield AdditionalOutputs(chatbot) messages.append({"role": "user", "content": text}) response_text = ( groq_client.chat.completions.create( model="llama-3.1-8b-instant", max_tokens=512, messages=messages, # type: ignore ) .choices[0] .message.content ) chatbot.append({"role": "assistant", "content": response_text}) # Convert response to audio using TTS model for audio_chunk in tts_model.stream_tts_sync(response_text or ""): # Yield the audio chunk yield audio_chunk yield AdditionalOutputs(chatbot) chatbot = gr.Chatbot(type="messages") stream = Stream( modality="audio", mode="send-receive", handler=ReplyOnPause(response, input_sample_rate=16000), rtc_configuration=credentials, additional_outputs_handler=lambda a, b: b, additional_inputs=[chatbot], additional_outputs=[chatbot], concurrency_limit=5 if get_space() else None, time_limit=90 if get_space() else None, ui_args={"title": "LLM Voice Chat (Powered by Groq, and WebRTC ⚡️)"}, ) # Mount the STREAM UI to the FastAPI app # Because I don't want to build the UI manually app = FastAPI() app = gr.mount_gradio_app(app, stream.ui, path="/") if __name__ == "__main__": import os os.environ["GRADIO_SSR_MODE"] = "false" if (mode := os.getenv("MODE")) == "UI": stream.ui.launch(server_port=7860) else: stream.ui.launch(server_port=7860)