llm-voice-chat

Running

App Files Files Community

llm-voice-chat / app.py

freddyaboulton HF Staff

Upload folder using huggingface_hub

4a472df verified 8 months ago

raw

history blame

3.45 kB

	from fastrtc import (
	ReplyOnPause,
	AdditionalOutputs,
	Stream,
	aggregate_bytes_to_16bit,
	get_twilio_turn_credentials,
	WebRTCError,
	stt,
	audio_to_bytes,
	)
	import numpy as np
	import gradio as gr
	from gradio.utils import get_space
	from groq import Groq
	from elevenlabs import ElevenLabs
	from dotenv import load_dotenv
	import time
	import os
	from fastapi import FastAPI

	load_dotenv()
	groq_client = Groq()
	tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))


	# See "Talk to Claude" in Cookbook for an example of how to keep
	# track of the chat history.
	def response(
	audio: tuple[int, np.ndarray],
	chatbot: list[dict] \| None = None,
	):
	try:
	chatbot = chatbot or []
	messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
	start = time.time()
	# text = stt(audio)
	text = groq_client.audio.transcriptions.create(
	file=("audio-file.mp3", audio_to_bytes(audio)),
	model="whisper-large-v3-turbo",
	response_format="verbose_json",
	).text
	print("transcription", time.time() - start)
	print("prompt", text)
	chatbot.append({"role": "user", "content": text})
	yield AdditionalOutputs(chatbot)
	messages.append({"role": "user", "content": text})
	response_text = (
	groq_client.chat.completions.create(
	model="llama-3.1-8b-instant",
	max_tokens=512,
	messages=messages, # type: ignore
	)
	.choices[0]
	.message.content
	)

	chatbot.append({"role": "assistant", "content": response_text})

	iterator = tts_client.text_to_speech.convert_as_stream(
	text=response_text, # type: ignore
	voice_id="JBFqnCBsd6RMkjVDRZzb",
	model_id="eleven_multilingual_v2",
	output_format="pcm_24000",
	)
	for chunk in aggregate_bytes_to_16bit(iterator):
	audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
	yield (24000, audio_array)
	yield AdditionalOutputs(chatbot)
	except Exception as e:
	import traceback

	traceback.print_exc()
	raise WebRTCError(traceback.format_exc())


	chatbot = gr.Chatbot(type="messages")
	stream = Stream(
	modality="audio",
	mode="send-receive",
	handler=ReplyOnPause(response, input_sample_rate=16000),
	additional_outputs_handler=lambda a, b: b,
	additional_inputs=[chatbot],
	additional_outputs=[chatbot],
	rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
	concurrency_limit=20 if get_space() else None,
	)
	for id, block in stream.ui.blocks.items():
	if isinstance(block, gr.HTML):
	stream.ui.blocks[id] = gr.HTML(
	"""
	<h1 style='text-align: center'>
	LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)
	</h1>
	"""
	)

	# Mount the STREAM UI to the FastAPI app
	# Because I don't want to build the UI manually
	app = FastAPI()
	gr.mount_gradio_app(app, stream.ui, path="/")


	if __name__ == "__main__":
	import os

	if (mode := os.getenv("MODE")) == "UI":
	stream.ui.launch(server_port=7860)
	elif mode == "PHONE":
	stream.fastphone(host="0.0.0.0", port=7860)
	else:
	import uvicorn

	uvicorn.run(app, host="0.0.0.0", port=7860)