Spaces:

palbha
/

conversational_ai

Sleeping

App Files Files Community

conversational_ai / app.py

palbha

Update app.py

6dbd5d4 verified 6 months ago

raw

history blame

5.36 kB

	import os
	import gradio as gr
	import google.generativeai as genai
	from gtts import gTTS
	import tempfile
	import time
	from google.colab import userdata

	# Configure the Gemini API
	GOOGLE_API_KEY = os.getenv("gemini_api") # Replace with your actual API key
	genai.configure(api_key=GOOGLE_API_KEY)

	# Initialize the model
	model = genai.GenerativeModel('gemini-pro')

	def transcribe_audio(audio_path):
	"""
	This function uses Google's Speech-to-Text API to transcribe audio.
	For the free tier, we're using a simple placeholder.
	In a real application, you'd use a proper STT API here.
	"""
	# For demonstration, we're returning a placeholder message
	# In a real app, you would connect to a speech-to-text service
	return "This is a placeholder for speech-to-text transcription. In a real application, this would be the transcribed text from your audio."

	def text_to_speech(text):
	"""Convert text to speech using gTTS and return the path to the audio file"""
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
	tts = gTTS(text=text, lang='en')
	tts.save(fp.name)
	return fp.name

	def chat_with_gemini(user_input, history):
	"""
	Process user input through Gemini API and return the response
	"""
	# Initialize conversation or continue existing one
	if not history:
	history = []
	chat = model.start_chat(history=[])
	else:
	# Reconstruct the chat session with history
	chat = model.start_chat(history=[
	{"role": "user" if i % 2 == 0 else "model", "parts": [msg]}
	for i, msg in enumerate(history)
	])

	# Generate response
	response = chat.send_message(user_input)
	response_text = response.text

	# Update history
	history.append(user_input)
	history.append(response_text)

	# Generate audio response
	audio_path = text_to_speech(response_text)

	return response_text, history, audio_path

	def process_audio(audio, history):
	"""Process audio input, convert to text, and get response"""
	if audio is None:
	return "No audio detected", history, None

	# Convert audio to text
	user_input = transcribe_audio(audio)

	# Get response from Gemini
	response_text, new_history, audio_path = chat_with_gemini(user_input, history)

	return response_text, new_history, audio_path

	def process_text(text_input, history):
	"""Process text input and get response"""
	if not text_input.strip():
	return "No input detected", history, None

	# Get response from Gemini
	response_text, new_history, audio_path = chat_with_gemini(text_input, history)

	return response_text, new_history, audio_path

	def display_history(history):
	"""Format the history for display"""
	if not history:
	return "No conversation history yet."

	display_text = ""
	for i in range(0, len(history), 2):
	if i < len(history):
	display_text += f"You: {history[i]}\n\n"
	if i + 1 < len(history):
	display_text += f"Assistant: {history[i+1]}\n\n"

	return display_text

	# Create the Gradio interface
	with gr.Blocks(title="Gemini Audio Chatbot") as demo:
	gr.Markdown("# Gemini Audio Chatbot")
	gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.")

	# State for conversation history
	history = gr.State([])

	with gr.Row():
	with gr.Column(scale=7):
	# Chat history display
	chat_display = gr.Markdown("No conversation history yet.")

	with gr.Column(scale=3):
	# Info and instructions
	gr.Markdown("""
	## How to use:
	1. Speak using the microphone or type your message
	2. Wait for the assistant's response
	3. The conversation history will be displayed on the left
	""")

	with gr.Row():
	# Text input
	text_input = gr.Textbox(
	placeholder="Type your message here...",
	label="Text Input"
	)

	with gr.Row():
	# Audio input
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Audio Input"
	)

	with gr.Row():
	# Assistant's response
	response_text = gr.Textbox(label="Assistant's Response")

	with gr.Row():
	# Audio output
	audio_output = gr.Audio(label="Assistant's Voice")

	# Buttons
	with gr.Row():
	clear_btn = gr.Button("Clear Conversation")

	# Event handlers
	text_input.submit(
	process_text,
	inputs=[text_input, history],
	outputs=[response_text, history, audio_output]
	).then(
	display_history,
	inputs=[history],
	outputs=[chat_display]
	).then(
	lambda: "",
	outputs=[text_input]
	)

	audio_input.change(
	process_audio,
	inputs=[audio_input, history],
	outputs=[response_text, history, audio_output]
	).then(
	display_history,
	inputs=[history],
	outputs=[chat_display]
	)

	clear_btn.click(
	lambda: ([], "No conversation history yet.", "", None),
	outputs=[history, chat_display, response_text, audio_output]
	)

	demo.launch()