Spaces:

palbha
/

conversational_ai

Sleeping

App Files Files Community

conversational_ai / app.py

palbha

Update app.py

6d394e1 verified 6 months ago

raw

history blame contribute delete

5.11 kB

	import os
	import gradio as gr
	from google import genai
	from gtts import gTTS
	import tempfile

	# Configure the Gemini API
	GOOGLE_API_KEY = os.getenv("gemini_api") # Ensure your API key is set
	client = genai.Client(api_key=GOOGLE_API_KEY)
	chat=None
	def transcribe_audio(audio_path):
	"""
	Transcribe the audio file using the Gemini API.
	"""
	try:
	# Upload the audio file
	#uploaded_file = client.files.upload(file=audio_path)
	print("Audio Path is",audio_path)
	myfile = client.files.upload(file=audio_path)
	# Send the file to Gemini for transcription
	response = client.models.generate_content(
	model='gemini-2.0-flash',
	contents=['Transcribe the input audio & return only the transcription.', myfile]
	)

	print("Transcription Response:", response.text)
	return response.text

	except Exception as e:
	print("Error in transcription:", str(e))
	return "Error in transcription"

	def text_to_speech(text):
	"""Convert text to speech using gTTS and return the path to the audio file."""
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
	tts = gTTS(text=text, lang='en')
	tts.save(fp.name)
	return fp.name

	def chat_with_gemini(user_input, history):
	"""
	Process user input through Gemini API and return the response.
	"""
	if history is None or not isinstance(history, list): # Ensure history is initialized
	history = []
	global chat
	# Initialize or continue conversation
	if chat is None:
	chat = client.chats.create(model="gemini-2.0-flash") # Initialize chat once


	print("User input:", user_input)

	# Generate response
	response = chat.send_message(user_input)
	response_text = response.text
	print("Response text:", response_text)

	# Append to history properly
	history.append((user_input, response_text))

	# Generate audio response
	audio_path = text_to_speech(response_text)

	return response_text, history, audio_path

	def process_audio(audio, history):
	"""Process audio input, convert to text, and get response."""
	if audio is None:
	return "No audio detected", history, None # Don't reset history

	# Convert audio to text
	user_input = transcribe_audio(audio)

	# Get response from Gemini
	response_text, new_history, audio_path = chat_with_gemini(user_input, history)

	return response_text, new_history, audio_path

	def process_text(text_input, history):
	"""Process text input and get response."""
	if not text_input.strip():
	return "No input detected", history, None

	# Get response from Gemini
	response_text, new_history, audio_path = chat_with_gemini(text_input, history)

	return response_text, new_history, audio_path

	def display_history(history):
	"""Format the history for display."""
	if not history:
	return "No conversation history yet."

	return "\n".join([f"You: {msg[0]}\nAssistant: {msg[1]}\n" for msg in history])

	# Create the Gradio interface
	with gr.Blocks(title="Gemini Audio Chatbot") as demo:
	gr.Markdown("# Gemini Audio Chatbot")
	gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.")

	# State for conversation history
	history = gr.State([]) # Ensuring the history persists

	with gr.Row():
	with gr.Column(scale=7):
	chat_display = gr.Markdown("No conversation history yet.")

	with gr.Column(scale=3):
	gr.Markdown("""
	## How to use:
	1. Speak using the microphone or type your message
	2. Wait for the assistant's response
	3. The conversation history will be displayed on the left
	""")

	with gr.Row():
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Audio Input"
	)

	# with gr.Row():
	# text_input = gr.Textbox(label="Type your message here")

	with gr.Row():
	response_text = gr.Textbox(label="Assistant's Response")

	with gr.Row():
	audio_output = gr.Audio(label="Assistant's Voice")

	# Buttons
	with gr.Row():
	clear_btn = gr.Button("Clear Conversation")

	# Audio and Text Input Handling
	audio_input.change(
	process_audio,
	inputs=[audio_input, history],
	outputs=[response_text, history, audio_output]
	).then(
	display_history,
	inputs=[history],
	outputs=[chat_display]
	)

	# text_input.submit(
	# process_text,
	# inputs=[text_input, history],
	# outputs=[response_text, history, audio_output]
	# ).then(
	# display_history,
	# inputs=[history],
	# outputs=[chat_display]
	# )

	# Clear conversation
	clear_btn.click(
	lambda: ([], "No conversation history yet.", "", None),
	outputs=[history, chat_display, response_text, audio_output]
	)

	demo.launch()