Spaces:

palbha
/

conversational_ai

Sleeping

App Files Files Community

palbha commited on Mar 18

Commit

9203946

verified ·

1 Parent(s): 1131a4e

Create app.py

Browse files

Files changed (1) hide show

app.py +161 -0

app.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import os
+import gradio as gr
+from google import genai
+from gtts import gTTS
+import tempfile
+# Configure the Gemini API
+GOOGLE_API_KEY = os.getenv("gemini_api")  # Ensure your API key is set
+client = genai.Client(api_key=GOOGLE_API_KEY)
+def transcribe_audio(audio_path):
+    """
+    Transcribe the audio file using the Gemini API.
+    """
+    try:
+        # Upload the audio file
+        uploaded_file = client.files.upload(file=audio_path)
+        # Send the file to Gemini for transcription
+        response = client.models.generate_content(
+            model='gemini-2.0-flash',
+            contents=['Transcribe the input audio & return only the transcription.', uploaded_file]
+        )
+        print("Transcription Response:", response.text)
+        return response.text
+    except Exception as e:
+        print("Error in transcription:", str(e))
+        return "Error in transcription"
+def text_to_speech(text):
+    """Convert text to speech using gTTS and return the path to the audio file."""
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
+        tts = gTTS(text=text, lang='en')
+        tts.save(fp.name)
+        return fp.name
+def chat_with_gemini(user_input, history):
+    """
+    Process user input through Gemini API and return the response.
+    """
+    if history is None or not isinstance(history, list):  # Ensure history is initialized
+        history = []
+    # Initialize or continue conversation
+    chat = client.chats.create(model="gemini-2.0-flash") if not history else history[-1]
+    print("User input:", user_input)
+    # Generate response
+    response = chat.send_message(user_input)
+    response_text = response.text
+    print("Response text:", response_text)
+    # Append to history properly
+    history.append((user_input, response_text))
+    # Generate audio response
+    audio_path = text_to_speech(response_text)
+    return response_text, history, audio_path
+def process_audio(audio, history):
+    """Process audio input, convert to text, and get response."""
+    if audio is None:
+        return "No audio detected", history, None
+    # Convert audio to text
+    user_input = transcribe_audio(audio)
+    # Get response from Gemini
+    response_text, new_history, audio_path = chat_with_gemini(user_input, history)
+    return response_text, new_history, audio_path
+def process_text(text_input, history):
+    """Process text input and get response."""
+    if not text_input.strip():
+        return "No input detected", history, None
+    # Get response from Gemini
+    response_text, new_history, audio_path = chat_with_gemini(text_input, history)
+    return response_text, new_history, audio_path
+def display_history(history):
+    """Format the history for display."""
+    if not history:
+        return "No conversation history yet."
+    return "\n".join([f"You: {msg[0]}\nAssistant: {msg[1]}\n" for msg in history])
+# Create the Gradio interface
+with gr.Blocks(title="Gemini Audio Chatbot") as demo:
+    gr.Markdown("# Gemini Audio Chatbot")
+    gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.")
+    # State for conversation history
+    history = gr.State([])  # Ensuring the history persists
+    with gr.Row():
+        with gr.Column(scale=7):
+            chat_display = gr.Markdown("No conversation history yet.")
+        with gr.Column(scale=3):
+            gr.Markdown("""
+            ## How to use:
+            1. Speak using the microphone or type your message
+            2. Wait for the assistant's response
+            3. The conversation history will be displayed on the left
+            """)
+    with gr.Row():
+        audio_input = gr.Audio(
+            sources=["microphone"],
+            type="filepath",
+            label="Audio Input"
+        )
+    with gr.Row():
+        text_input = gr.Textbox(label="Type your message here")
+    with gr.Row():
+        response_text = gr.Textbox(label="Assistant's Response")
+    with gr.Row():
+        audio_output = gr.Audio(label="Assistant's Voice")
+    # Buttons
+    with gr.Row():
+        clear_btn = gr.Button("Clear Conversation")
+    # Audio and Text Input Handling
+    audio_input.change(
+        process_audio,
+        inputs=[audio_input, history],
+        outputs=[response_text, history, audio_output]
+    ).then(
+        display_history,
+        inputs=[history],
+        outputs=[chat_display]
+    )
+    text_input.submit(
+        process_text,
+        inputs=[text_input, history],
+        outputs=[response_text, history, audio_output]
+    ).then(
+        display_history,
+        inputs=[history],
+        outputs=[chat_display]
+    )
+    # Clear conversation
+    clear_btn.click(
+        lambda: ([], "No conversation history yet.", "", None),
+        outputs=[history, chat_display, response_text, audio_output]
+    )
+demo.launch()