Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from google import genai | |
| from gtts import gTTS | |
| import tempfile | |
| # Configure the Gemini API | |
| GOOGLE_API_KEY = os.getenv("gemini_api") # Ensure your API key is set | |
| client = genai.Client(api_key=GOOGLE_API_KEY) | |
| chat=None | |
| def transcribe_audio(audio_path): | |
| """ | |
| Transcribe the audio file using the Gemini API. | |
| """ | |
| try: | |
| # Upload the audio file | |
| #uploaded_file = client.files.upload(file=audio_path) | |
| print("Audio Path is",audio_path) | |
| myfile = client.files.upload(file=audio_path) | |
| # Send the file to Gemini for transcription | |
| response = client.models.generate_content( | |
| model='gemini-2.0-flash', | |
| contents=['Transcribe the input audio & return only the transcription.', myfile] | |
| ) | |
| print("Transcription Response:", response.text) | |
| return response.text | |
| except Exception as e: | |
| print("Error in transcription:", str(e)) | |
| return "Error in transcription" | |
| def text_to_speech(text): | |
| """Convert text to speech using gTTS and return the path to the audio file.""" | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: | |
| tts = gTTS(text=text, lang='en') | |
| tts.save(fp.name) | |
| return fp.name | |
| def chat_with_gemini(user_input, history): | |
| """ | |
| Process user input through Gemini API and return the response. | |
| """ | |
| if history is None or not isinstance(history, list): # Ensure history is initialized | |
| history = [] | |
| global chat | |
| # Initialize or continue conversation | |
| if chat is None: | |
| chat = client.chats.create(model="gemini-2.0-flash") # Initialize chat once | |
| print("User input:", user_input) | |
| # Generate response | |
| response = chat.send_message(user_input) | |
| response_text = response.text | |
| print("Response text:", response_text) | |
| # Append to history properly | |
| history.append((user_input, response_text)) | |
| # Generate audio response | |
| audio_path = text_to_speech(response_text) | |
| return response_text, history, audio_path | |
| def process_audio(audio, history): | |
| """Process audio input, convert to text, and get response.""" | |
| if audio is None: | |
| return "No audio detected", history, None # Don't reset history | |
| # Convert audio to text | |
| user_input = transcribe_audio(audio) | |
| # Get response from Gemini | |
| response_text, new_history, audio_path = chat_with_gemini(user_input, history) | |
| return response_text, new_history, audio_path | |
| def process_text(text_input, history): | |
| """Process text input and get response.""" | |
| if not text_input.strip(): | |
| return "No input detected", history, None | |
| # Get response from Gemini | |
| response_text, new_history, audio_path = chat_with_gemini(text_input, history) | |
| return response_text, new_history, audio_path | |
| def display_history(history): | |
| """Format the history for display.""" | |
| if not history: | |
| return "No conversation history yet." | |
| return "\n".join([f"You: {msg[0]}\nAssistant: {msg[1]}\n" for msg in history]) | |
| # Create the Gradio interface | |
| with gr.Blocks(title="Gemini Audio Chatbot") as demo: | |
| gr.Markdown("# Gemini Audio Chatbot") | |
| gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.") | |
| # State for conversation history | |
| history = gr.State([]) # Ensuring the history persists | |
| with gr.Row(): | |
| with gr.Column(scale=7): | |
| chat_display = gr.Markdown("No conversation history yet.") | |
| with gr.Column(scale=3): | |
| gr.Markdown(""" | |
| ## How to use: | |
| 1. Speak using the microphone or type your message | |
| 2. Wait for the assistant's response | |
| 3. The conversation history will be displayed on the left | |
| """) | |
| with gr.Row(): | |
| audio_input = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="Audio Input" | |
| ) | |
| # with gr.Row(): | |
| # text_input = gr.Textbox(label="Type your message here") | |
| with gr.Row(): | |
| response_text = gr.Textbox(label="Assistant's Response") | |
| with gr.Row(): | |
| audio_output = gr.Audio(label="Assistant's Voice") | |
| # Buttons | |
| with gr.Row(): | |
| clear_btn = gr.Button("Clear Conversation") | |
| # Audio and Text Input Handling | |
| audio_input.change( | |
| process_audio, | |
| inputs=[audio_input, history], | |
| outputs=[response_text, history, audio_output] | |
| ).then( | |
| display_history, | |
| inputs=[history], | |
| outputs=[chat_display] | |
| ) | |
| # text_input.submit( | |
| # process_text, | |
| # inputs=[text_input, history], | |
| # outputs=[response_text, history, audio_output] | |
| # ).then( | |
| # display_history, | |
| # inputs=[history], | |
| # outputs=[chat_display] | |
| # ) | |
| # Clear conversation | |
| clear_btn.click( | |
| lambda: ([], "No conversation history yet.", "", None), | |
| outputs=[history, chat_display, response_text, audio_output] | |
| ) | |
| demo.launch() | |