Spaces:

ysharma
/

Gemini2-Flash-Thinking

Running

File size: 5,500 Bytes

a67a358

import os
import gradio as gr
from gradio import ChatMessage
from typing import Iterator
import google.generativeai as genai

# get Gemini API Key from the environ variable
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

# we will be using the Gemini 2.0 Flash model with Thinking capabilities
model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")


def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
    """
    Streams LLM's thoughts and response.
    """
    try:
        # Enabling logging for users to understand how thinking works along with streaming 
        print(f"\n=== New Request ===")
        print(f"User message: {user_message}")

        # Initialize response from Gemini
        response = model.generate_content(user_message, stream=True)

        # Initialize buffers and flags
        thought_buffer = ""
        response_buffer = ""
        has_response = False
        thinking_complete = False

        # Add initial thinking message
        messages.append(
            ChatMessage(
                role="assistant",
                content="",
                metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
            )
        )

        for chunk in response:
            parts = chunk.candidates[0].content.parts
            current_chunk = parts[0].text

            if len(parts) == 2 and not thinking_complete:
                # Complete thought
                thought_buffer += current_chunk
                print(f"\n=== Complete Thought ===\n{thought_buffer}")

                # Update thinking message
                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
                )
                yield messages
                
                # Start response
                response_buffer = parts[1].text
                print(f"\n=== Starting Response ===\n{response_buffer}")

                messages.append(
                    ChatMessage(
                        role="assistant",
                        content=response_buffer
                    )
                )

                thinking_complete = True
                has_response = True
                yield messages
                time.sleep(0.05)  # Small delay for visible streaming

            elif thinking_complete:
                # Stream response
                response_buffer += current_chunk
                print(f"\n=== Response Chunk ===\n{current_chunk}")

                messages[-1] = ChatMessage(
                    role="assistant",
                    content=response_buffer
                )
                yield messages
                
            else:
                # Stream thinking
                thought_buffer += current_chunk
                print(f"\n=== Thinking Chunk ===\n{current_chunk}")

                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
                )
                yield messages

        # Log final complete response
        print(f"\n=== Final Response ===\n{response_buffer}")

    except Exception as e:
        print(f"\n=== Error ===\n{str(e)}")
        messages.append(
            ChatMessage(
                role="assistant",
                content=f"I apologize, but I encountered an error: {str(e)}"
            )
        )
        yield messages

def user_message(msg: str, history: list) -> tuple[str, list]:
    """Adds user message to chat history"""
    history.append(ChatMessage(role="user", content=msg))
    return "", history

# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
  #with gr.Column():
    gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭")

    chatbot = gr.Chatbot(
        type="messages",
        label="Gemini2.0 'Thinking' Chatbot",
        render_markdown=True,
        scale=1,
        avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
    )

    with gr.Row(equal_height=True):
        input_box = gr.Textbox(
            lines=1,
            label="Chat Message",
            placeholder="Type your message here...",
            scale=4
        )

        clear_button = gr.Button("Clear Chat", scale=1)

    # Set up event handlers
    msg_store = gr.State("")  # Store for preserving user message
    
    input_box.submit(
        lambda msg: (msg, msg, ""),  # Store message and clear input
        inputs=[input_box],
        outputs=[msg_store, input_box, input_box],
        queue=False
    ).then(
        user_message,  # Add user message to chat
        inputs=[msg_store, chatbot],
        outputs=[input_box, chatbot],
        queue=False
    ).then(
        stream_gemini_response,  # Generate and stream response
        inputs=[msg_store, chatbot],
        outputs=chatbot
    )

    clear_button.click(
        lambda: ([], "", ""),
        outputs=[chatbot, input_box, msg_store],
        queue=False
    )

# Launch the interface
if __name__ == "__main__":
    demo.launch(debug=True)