Spaces:

fastrtc
/

integrated-textbox

Running

App Files Files Community

integrated-textbox / app.py

freddyaboulton HF Staff

Update app.py

ea7851d verified 7 days ago

raw

history blame

3.86 kB

	# /// script
	# dependencies = [
	# "fastrtc[vad, stt]==0.0.26.rc1",
	# "openai",
	# ]
	# ///


	import gradio as gr
	import huggingface_hub
	from fastrtc import (
	AdditionalOutputs,
	ReplyOnPause,
	WebRTC,
	WebRTCData,
	WebRTCError,
	get_hf_turn_credentials,
	get_stt_model,
	)
	from gradio.utils import get_space
	from openai import OpenAI

	stt_model = get_stt_model()

	conversations = {}


	def response(
	data: WebRTCData,
	conversation: list[dict],
	token: str \| None = None,
	model: str = "meta-llama/Llama-3.2-3B-Instruct",
	provider: str = "sambanova",
	):
	print("conversation before", conversation)
	if not provider.startswith("http") and not token:
	raise WebRTCError("Please add your HF token.")

	if data.audio is not None and data.audio[1].size > 0:
	user_audio_text = stt_model.stt(data.audio)
	conversation.append({"role": "user", "content": user_audio_text})
	else:
	conversation.append({"role": "user", "content": data.textbox})

	yield AdditionalOutputs(conversation)

	if provider.startswith("http"):
	client = OpenAI(base_url=provider, api_key="ollama")
	else:
	client = huggingface_hub.InferenceClient(
	api_key=token,
	provider=provider, # type: ignore
	)

	request = client.chat.completions.create(
	model=model,
	messages=conversation, # type: ignore
	temperature=1,
	top_p=0.1,
	)
	response = {"role": "assistant", "content": request.choices[0].message.content}

	conversation.append(response)
	print("conversation after", conversation)
	yield AdditionalOutputs(conversation)


	css = """
	footer {
	display: none !important;
	}
	"""

	providers = [
	"black-forest-labs",
	"cerebras",
	"cohere",
	"fal-ai",
	"fireworks-ai",
	"hf-inference",
	"hyperbolic",
	"nebius",
	"novita",
	"openai",
	"replicate",
	"sambanova",
	"together",
	]


	def hide_token(provider: str):
	if provider.startswith("http"):
	return gr.Textbox(visible=False)
	return gr.skip()


	with gr.Blocks(css=css) as demo:
	gr.HTML(
	"""
	<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
	<img src="https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/AV_Huggy.png" alt="Streaming Huggy" style="height: 50px; margin-right: 10px"> FastRTC Chat
	</h1>
	"""
	)
	with gr.Sidebar():
	token = gr.Textbox(
	placeholder="Place your HF token here", type="password", label="HF Token"
	)
	model = gr.Dropdown(
	choices=["meta-llama/Llama-3.2-3B-Instruct"],
	allow_custom_value=True,
	label="Model",
	)
	provider = gr.Dropdown(
	label="Provider",
	choices=providers,
	value="sambanova",
	info="Select a hf-compatible provider or type the url of your server, e.g. http://127.0.0.1:11434/v1 for ollama",
	allow_custom_value=True,
	)
	provider.change(hide_token, inputs=[provider], outputs=[token])
	cb = gr.Chatbot(type="messages", height=600)
	webrtc = WebRTC(
	modality="audio",
	mode="send",
	variant="textbox",
	rtc_configuration=get_hf_turn_credentials if get_space() else None,
	server_rtc_configuration=get_hf_turn_credentials(ttl=3_600 * 24 * 30)
	if get_space()
	else None,
	)
	webrtc.stream(
	ReplyOnPause(response), # type: ignore
	inputs=[webrtc, cb, token, model, provider],
	outputs=[cb],
	concurrency_limit=100,
	)
	webrtc.on_additional_outputs(
	lambda old, new: new, inputs=[cb], outputs=[cb], concurrency_limit=100
	)

	if __name__ == "__main__":
	demo.launch(server_port=7860)