Spaces:

palbha
/

conversational_ai

Running

App Files Files Community

palbha commited on Mar 18

Commit

5c8bbca

verified ·

1 Parent(s): 856548e

Create app.py

Browse files

Files changed (1) hide show

app.py +176 -0

app.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import os
+import gradio as gr
+import google.generativeai as genai
+from gtts import gTTS
+import tempfile
+import time
+from google.colab import userdata
+# Configure the Gemini API
+GOOGLE_API_KEY = userdata.get('gemini_api')  # Replace with your actual API key
+genai.configure(api_key=GOOGLE_API_KEY)
+# Initialize the model
+model = genai.GenerativeModel('gemini-pro')
+def transcribe_audio(audio_path):
+    """
+    This function uses Google's Speech-to-Text API to transcribe audio.
+    For the free tier, we're using a simple placeholder.
+    In a real application, you'd use a proper STT API here.
+    """
+    # For demonstration, we're returning a placeholder message
+    # In a real app, you would connect to a speech-to-text service
+    return "This is a placeholder for speech-to-text transcription. In a real application, this would be the transcribed text from your audio."
+def text_to_speech(text):
+    """Convert text to speech using gTTS and return the path to the audio file"""
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
+        tts = gTTS(text=text, lang='en')
+        tts.save(fp.name)
+        return fp.name
+def chat_with_gemini(user_input, history):
+    """
+    Process user input through Gemini API and return the response
+    """
+    # Initialize conversation or continue existing one
+    if not history:
+        history = []
+        chat = model.start_chat(history=[])
+    else:
+        # Reconstruct the chat session with history
+        chat = model.start_chat(history=[
+            {"role": "user" if i % 2 == 0 else "model", "parts": [msg]}
+            for i, msg in enumerate(history)
+        ])
+    # Generate response
+    response = chat.send_message(user_input)
+    response_text = response.text
+    # Update history
+    history.append(user_input)
+    history.append(response_text)
+    # Generate audio response
+    audio_path = text_to_speech(response_text)
+    return response_text, history, audio_path
+def process_audio(audio, history):
+    """Process audio input, convert to text, and get response"""
+    if audio is None:
+        return "No audio detected", history, None
+    # Convert audio to text
+    user_input = transcribe_audio(audio)
+    # Get response from Gemini
+    response_text, new_history, audio_path = chat_with_gemini(user_input, history)
+    return response_text, new_history, audio_path
+def process_text(text_input, history):
+    """Process text input and get response"""
+    if not text_input.strip():
+        return "No input detected", history, None
+    # Get response from Gemini
+    response_text, new_history, audio_path = chat_with_gemini(text_input, history)
+    return response_text, new_history, audio_path
+def display_history(history):
+    """Format the history for display"""
+    if not history:
+        return "No conversation history yet."
+    display_text = ""
+    for i in range(0, len(history), 2):
+        if i < len(history):
+            display_text += f"You: {history[i]}\n\n"
+        if i + 1 < len(history):
+            display_text += f"Assistant: {history[i+1]}\n\n"
+    return display_text
+# Create the Gradio interface
+with gr.Blocks(title="Gemini Audio Chatbot") as demo:
+    gr.Markdown("# Gemini Audio Chatbot")
+    gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.")
+    # State for conversation history
+    history = gr.State([])
+    with gr.Row():
+        with gr.Column(scale=7):
+            # Chat history display
+            chat_display = gr.Markdown("No conversation history yet.")
+        with gr.Column(scale=3):
+            # Info and instructions
+            gr.Markdown("""
+            ## How to use:
+            1. Speak using the microphone or type your message
+            2. Wait for the assistant's response
+            3. The conversation history will be displayed on the left
+            """)
+    with gr.Row():
+        # Text input
+        text_input = gr.Textbox(
+            placeholder="Type your message here...",
+            label="Text Input"
+        )
+    with gr.Row():
+        # Audio input
+        audio_input = gr.Audio(
+            sources=["microphone"],
+            type="filepath",
+            label="Audio Input"
+        )
+    with gr.Row():
+        # Assistant's response
+        response_text = gr.Textbox(label="Assistant's Response")
+    with gr.Row():
+        # Audio output
+        audio_output = gr.Audio(label="Assistant's Voice")
+    # Buttons
+    with gr.Row():
+        clear_btn = gr.Button("Clear Conversation")
+    # Event handlers
+    text_input.submit(
+        process_text,
+        inputs=[text_input, history],
+        outputs=[response_text, history, audio_output]
+    ).then(
+        display_history,
+        inputs=[history],
+        outputs=[chat_display]
+    ).then(
+        lambda: "",
+        outputs=[text_input]
+    )
+    audio_input.change(
+        process_audio,
+        inputs=[audio_input, history],
+        outputs=[response_text, history, audio_output]
+    ).then(
+        display_history,
+        inputs=[history],
+        outputs=[chat_display]
+    )
+    clear_btn.click(
+        lambda: ([], "No conversation history yet.", "", None),
+        outputs=[history, chat_display, response_text, audio_output]
+    )
+demo.launch()