Spaces:

archit11
/

shuka_demo

Sleeping

App Files Files Community

shuka_demo / app.py

archit11

Update app.py

c591299 verified about 1 year ago

raw

history blame

2.61 kB

	import transformers
	import gradio as gr
	import librosa
	import torch
	import spaces
	import numpy as np

	# Initialize the conversation history globally
	conversation_history = []

	@spaces.GPU(duration=120)
	def transcribe_and_respond(audio_file, chat_history):
	try:
	pipe = transformers.pipeline(
	model='sarvamai/shuka_v1',
	trust_remote_code=True,
	device=0,
	torch_dtype=torch.bfloat16
	)

	# Load the audio file
	audio, sr = librosa.load(audio_file, sr=16000)

	# Debug: Print audio properties for debugging
	print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")

	# Prepare conversation turns
	turns = chat_history.copy() # Take the existing chat history and append user input
	turns.append({'role': 'user', 'content': '<\|audio\|>'})

	# Debug: Print the updated turns for debugging purposes
	print(f"Updated turns: {turns}")

	# Call the model with the updated conversation turns and audio
	output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)

	# Append the model's response to the conversation history
	turns.append({'role': 'system', 'content': output})

	# Debug: Print the model's response
	print(f"Model output: {output}")

	# Format the chat history for Gradio's Chatbot
	chat_history_for_display = []
	for turn in turns:
	if turn['role'] == 'user':
	chat_history_for_display.append(("User", "🗣️ (Spoken Audio)"))
	else:
	chat_history_for_display.append(("AI", turn['content']))

	return chat_history_for_display, turns # Return the formatted chat history for display and the updated history

	except Exception as e:
	return f"Error: {str(e)}", chat_history # Ensure history is returned even on error

	# Define the Gradio interface
	iface = gr.Interface(
	fn=transcribe_and_respond,
	inputs=[
	gr.Audio(sources="microphone", type="filepath", label="Your Audio (Microphone)"),
	gr.State([]) # Hidden state to maintain conversation history
	],
	outputs=[
	gr.Chatbot(label="Conversation History"), # Display the conversation
	gr.State([]) # Hidden state to keep track of the updated conversation history
	],
	title="Shuka demo",
	description="shuka live demo",
	live=True, # Enable live mode for real-time interaction
	allow_flagging="auto",
	# enable_queue=True
	)

	if __name__ == "__main__":
	iface.launch()