Spaces:
Sleeping
Sleeping
File size: 5,105 Bytes
9203946 528c829 9203946 b9e169a df33bb7 6d394e1 9203946 65f35bc 9203946 d846897 9203946 1b68154 9203946 1b68154 9203946 df33bb7 9203946 df33bb7 9203946 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import os
import gradio as gr
from google import genai
from gtts import gTTS
import tempfile
# Configure the Gemini API
GOOGLE_API_KEY = os.getenv("gemini_api") # Ensure your API key is set
client = genai.Client(api_key=GOOGLE_API_KEY)
chat=None
def transcribe_audio(audio_path):
"""
Transcribe the audio file using the Gemini API.
"""
try:
# Upload the audio file
#uploaded_file = client.files.upload(file=audio_path)
print("Audio Path is",audio_path)
myfile = client.files.upload(file=audio_path)
# Send the file to Gemini for transcription
response = client.models.generate_content(
model='gemini-2.0-flash',
contents=['Transcribe the input audio & return only the transcription.', myfile]
)
print("Transcription Response:", response.text)
return response.text
except Exception as e:
print("Error in transcription:", str(e))
return "Error in transcription"
def text_to_speech(text):
"""Convert text to speech using gTTS and return the path to the audio file."""
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
tts = gTTS(text=text, lang='en')
tts.save(fp.name)
return fp.name
def chat_with_gemini(user_input, history):
"""
Process user input through Gemini API and return the response.
"""
if history is None or not isinstance(history, list): # Ensure history is initialized
history = []
global chat
# Initialize or continue conversation
if chat is None:
chat = client.chats.create(model="gemini-2.0-flash") # Initialize chat once
print("User input:", user_input)
# Generate response
response = chat.send_message(user_input)
response_text = response.text
print("Response text:", response_text)
# Append to history properly
history.append((user_input, response_text))
# Generate audio response
audio_path = text_to_speech(response_text)
return response_text, history, audio_path
def process_audio(audio, history):
"""Process audio input, convert to text, and get response."""
if audio is None:
return "No audio detected", history, None # Don't reset history
# Convert audio to text
user_input = transcribe_audio(audio)
# Get response from Gemini
response_text, new_history, audio_path = chat_with_gemini(user_input, history)
return response_text, new_history, audio_path
def process_text(text_input, history):
"""Process text input and get response."""
if not text_input.strip():
return "No input detected", history, None
# Get response from Gemini
response_text, new_history, audio_path = chat_with_gemini(text_input, history)
return response_text, new_history, audio_path
def display_history(history):
"""Format the history for display."""
if not history:
return "No conversation history yet."
return "\n".join([f"You: {msg[0]}\nAssistant: {msg[1]}\n" for msg in history])
# Create the Gradio interface
with gr.Blocks(title="Gemini Audio Chatbot") as demo:
gr.Markdown("# Gemini Audio Chatbot")
gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.")
# State for conversation history
history = gr.State([]) # Ensuring the history persists
with gr.Row():
with gr.Column(scale=7):
chat_display = gr.Markdown("No conversation history yet.")
with gr.Column(scale=3):
gr.Markdown("""
## How to use:
1. Speak using the microphone or type your message
2. Wait for the assistant's response
3. The conversation history will be displayed on the left
""")
with gr.Row():
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Audio Input"
)
# with gr.Row():
# text_input = gr.Textbox(label="Type your message here")
with gr.Row():
response_text = gr.Textbox(label="Assistant's Response")
with gr.Row():
audio_output = gr.Audio(label="Assistant's Voice")
# Buttons
with gr.Row():
clear_btn = gr.Button("Clear Conversation")
# Audio and Text Input Handling
audio_input.change(
process_audio,
inputs=[audio_input, history],
outputs=[response_text, history, audio_output]
).then(
display_history,
inputs=[history],
outputs=[chat_display]
)
# text_input.submit(
# process_text,
# inputs=[text_input, history],
# outputs=[response_text, history, audio_output]
# ).then(
# display_history,
# inputs=[history],
# outputs=[chat_display]
# )
# Clear conversation
clear_btn.click(
lambda: ([], "No conversation history yet.", "", None),
outputs=[history, chat_display, response_text, audio_output]
)
demo.launch()
|