File size: 5,500 Bytes
a67a358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import os
import gradio as gr
from gradio import ChatMessage
from typing import Iterator
import google.generativeai as genai

# get Gemini API Key from the environ variable
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

# we will be using the Gemini 2.0 Flash model with Thinking capabilities
model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")


def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
    """
    Streams LLM's thoughts and response.
    """
    try:
        # Enabling logging for users to understand how thinking works along with streaming 
        print(f"\n=== New Request ===")
        print(f"User message: {user_message}")

        # Initialize response from Gemini
        response = model.generate_content(user_message, stream=True)

        # Initialize buffers and flags
        thought_buffer = ""
        response_buffer = ""
        has_response = False
        thinking_complete = False

        # Add initial thinking message
        messages.append(
            ChatMessage(
                role="assistant",
                content="",
                metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
            )
        )

        for chunk in response:
            parts = chunk.candidates[0].content.parts
            current_chunk = parts[0].text

            if len(parts) == 2 and not thinking_complete:
                # Complete thought
                thought_buffer += current_chunk
                print(f"\n=== Complete Thought ===\n{thought_buffer}")

                # Update thinking message
                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
                )
                yield messages
                
                # Start response
                response_buffer = parts[1].text
                print(f"\n=== Starting Response ===\n{response_buffer}")

                messages.append(
                    ChatMessage(
                        role="assistant",
                        content=response_buffer
                    )
                )

                thinking_complete = True
                has_response = True
                yield messages
                time.sleep(0.05)  # Small delay for visible streaming

            elif thinking_complete:
                # Stream response
                response_buffer += current_chunk
                print(f"\n=== Response Chunk ===\n{current_chunk}")

                messages[-1] = ChatMessage(
                    role="assistant",
                    content=response_buffer
                )
                yield messages
                
            else:
                # Stream thinking
                thought_buffer += current_chunk
                print(f"\n=== Thinking Chunk ===\n{current_chunk}")

                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
                )
                yield messages

        # Log final complete response
        print(f"\n=== Final Response ===\n{response_buffer}")

    except Exception as e:
        print(f"\n=== Error ===\n{str(e)}")
        messages.append(
            ChatMessage(
                role="assistant",
                content=f"I apologize, but I encountered an error: {str(e)}"
            )
        )
        yield messages

def user_message(msg: str, history: list) -> tuple[str, list]:
    """Adds user message to chat history"""
    history.append(ChatMessage(role="user", content=msg))
    return "", history

# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
  #with gr.Column():
    gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭")

    chatbot = gr.Chatbot(
        type="messages",
        label="Gemini2.0 'Thinking' Chatbot",
        render_markdown=True,
        scale=1,
        avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
    )

    with gr.Row(equal_height=True):
        input_box = gr.Textbox(
            lines=1,
            label="Chat Message",
            placeholder="Type your message here...",
            scale=4
        )

        clear_button = gr.Button("Clear Chat", scale=1)

    # Set up event handlers
    msg_store = gr.State("")  # Store for preserving user message
    
    input_box.submit(
        lambda msg: (msg, msg, ""),  # Store message and clear input
        inputs=[input_box],
        outputs=[msg_store, input_box, input_box],
        queue=False
    ).then(
        user_message,  # Add user message to chat
        inputs=[msg_store, chatbot],
        outputs=[input_box, chatbot],
        queue=False
    ).then(
        stream_gemini_response,  # Generate and stream response
        inputs=[msg_store, chatbot],
        outputs=chatbot
    )

    clear_button.click(
        lambda: ([], "", ""),
        outputs=[chatbot, input_box, msg_store],
        queue=False
    )

# Launch the interface
if __name__ == "__main__":
    demo.launch(debug=True)