# /// script # dependencies = [ # "fastrtc[vad, stt]==0.0.26.rc1", # "openai", # ] # /// import gradio as gr import huggingface_hub from fastrtc import ( AdditionalOutputs, ReplyOnPause, WebRTC, WebRTCData, WebRTCError, get_hf_turn_credentials, get_stt_model, ) from gradio.utils import get_space from openai import OpenAI stt_model = get_stt_model() conversations = {} def response( data: WebRTCData, conversation: list[dict], token: str | None = None, model: str = "meta-llama/Llama-3.2-3B-Instruct", provider: str = "sambanova", ): print("conversation before", conversation) if not provider.startswith("http") and not token: raise WebRTCError("Please add your HF token.") if data.audio is not None and data.audio[1].size > 0: user_audio_text = stt_model.stt(data.audio) conversation.append({"role": "user", "content": user_audio_text}) else: conversation.append({"role": "user", "content": data.textbox}) yield AdditionalOutputs(conversation) if provider.startswith("http"): client = OpenAI(base_url=provider, api_key="ollama") else: client = huggingface_hub.InferenceClient( api_key=token, provider=provider, # type: ignore ) request = client.chat.completions.create( model=model, messages=conversation, # type: ignore temperature=1, top_p=0.1, ) response = {"role": "assistant", "content": request.choices[0].message.content} conversation.append(response) print("conversation after", conversation) yield AdditionalOutputs(conversation) css = """ footer { display: none !important; } """ providers = [ "black-forest-labs", "cerebras", "cohere", "fal-ai", "fireworks-ai", "hf-inference", "hyperbolic", "nebius", "novita", "openai", "replicate", "sambanova", "together", ] def hide_token(provider: str): if provider.startswith("http"): return gr.Textbox(visible=False) return gr.skip() with gr.Blocks(css=css) as demo: gr.HTML( """