palbha's picture
Rename app.py to app1.py
1131a4e verified
import os
import gradio as gr
from google import genai
from gtts import gTTS
import tempfile
import time
# Configure the Gemini API
GOOGLE_API_KEY = os.getenv("gemini_api") # Replace with your actual API key
client = genai.Client(api_key=GOOGLE_API_KEY)
def transcribe_audio(audio_path):
"""
This function uses Google's Speech-to-Text API to transcribe audio.
For the free tier, we're using a simple placeholder.
In a real application, you'd use a proper STT API here.
"""
# For demonstration, we're returning a placeholder message
# In a real app, you would connect to a speech-to-text service
response = client.models.generate_content(
model='gemini-2.0-flash',
contents=['Transcribe the input audio & return the transcription only Example - Audio file is transcribed to Hello then just return Hello', audio_path]
)
print(response.text)
return response.text
def text_to_speech(text):
"""Convert text to speech using gTTS and return the path to the audio file"""
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
tts = gTTS(text=text, lang='en')
tts.save(fp.name)
return fp.name
def chat_with_gemini(user_input, history):
"""
Process user input through Gemini API and return the response
"""
# Initialize conversation or continue existing one
if not history:
history = []
chat = client.chats.create(model="gemini-2.0-flash")
print("History is",history)
print("User input is ",user_input)
# Generate response
response = chat.send_message(user_input)
response_text = response.text
print("Response text is ",response_text)
# Update history
history.append(user_input)
history.append(response_text)
# Generate audio response
audio_path = text_to_speech(response_text)
return response_text, history, audio_path
def process_audio(audio, history):
"""Process audio input, convert to text, and get response"""
if audio is None:
return "No audio detected", history, None
# Convert audio to text
user_input = transcribe_audio(audio)
# Get response from Gemini
response_text, new_history, audio_path = chat_with_gemini(user_input, history)
return response_text, new_history, audio_path
def process_text(text_input, history):
"""Process text input and get response"""
if not text_input.strip():
return "No input detected", history, None
# Get response from Gemini
response_text, new_history, audio_path = chat_with_gemini(text_input, history)
return response_text, new_history, audio_path
def display_history(history):
"""Format the history for display"""
if not history:
return "No conversation history yet."
display_text = ""
for i in range(0, len(history), 2):
if i < len(history):
display_text += f"You: {history[i]}\n\n"
if i + 1 < len(history):
display_text += f"Assistant: {history[i+1]}\n\n"
return display_text
# Create the Gradio interface
with gr.Blocks(title="Gemini Audio Chatbot") as demo:
gr.Markdown("# Gemini Audio Chatbot")
gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.")
# State for conversation history
history = gr.State([])
with gr.Row():
with gr.Column(scale=7):
# Chat history display
chat_display = gr.Markdown("No conversation history yet.")
with gr.Column(scale=3):
# Info and instructions
gr.Markdown("""
## How to use:
1. Speak using the microphone or type your message
2. Wait for the assistant's response
3. The conversation history will be displayed on the left
""")
with gr.Row():
# Audio input
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Audio Input"
)
with gr.Row():
# Assistant's response
response_text = gr.Textbox(label="Assistant's Response")
with gr.Row():
# Audio output
audio_output = gr.Audio(label="Assistant's Voice")
# Buttons
with gr.Row():
clear_btn = gr.Button("Clear Conversation")
audio_input.change(
process_audio,
inputs=[audio_input, history],
outputs=[response_text, history, audio_output]
).then(
display_history,
inputs=[history],
outputs=[chat_display]
)
clear_btn.click(
lambda: ([], "No conversation history yet.", "", None),
outputs=[history, chat_display, response_text, audio_output]
)
demo.launch()