File size: 5,500 Bytes
a67a358 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import os
import gradio as gr
from gradio import ChatMessage
from typing import Iterator
import google.generativeai as genai
# get Gemini API Key from the environ variable
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)
# we will be using the Gemini 2.0 Flash model with Thinking capabilities
model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
"""
Streams LLM's thoughts and response.
"""
try:
# Enabling logging for users to understand how thinking works along with streaming
print(f"\n=== New Request ===")
print(f"User message: {user_message}")
# Initialize response from Gemini
response = model.generate_content(user_message, stream=True)
# Initialize buffers and flags
thought_buffer = ""
response_buffer = ""
has_response = False
thinking_complete = False
# Add initial thinking message
messages.append(
ChatMessage(
role="assistant",
content="",
metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
)
)
for chunk in response:
parts = chunk.candidates[0].content.parts
current_chunk = parts[0].text
if len(parts) == 2 and not thinking_complete:
# Complete thought
thought_buffer += current_chunk
print(f"\n=== Complete Thought ===\n{thought_buffer}")
# Update thinking message
messages[-1] = ChatMessage(
role="assistant",
content=thought_buffer,
metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
)
yield messages
# Start response
response_buffer = parts[1].text
print(f"\n=== Starting Response ===\n{response_buffer}")
messages.append(
ChatMessage(
role="assistant",
content=response_buffer
)
)
thinking_complete = True
has_response = True
yield messages
time.sleep(0.05) # Small delay for visible streaming
elif thinking_complete:
# Stream response
response_buffer += current_chunk
print(f"\n=== Response Chunk ===\n{current_chunk}")
messages[-1] = ChatMessage(
role="assistant",
content=response_buffer
)
yield messages
else:
# Stream thinking
thought_buffer += current_chunk
print(f"\n=== Thinking Chunk ===\n{current_chunk}")
messages[-1] = ChatMessage(
role="assistant",
content=thought_buffer,
metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
)
yield messages
# Log final complete response
print(f"\n=== Final Response ===\n{response_buffer}")
except Exception as e:
print(f"\n=== Error ===\n{str(e)}")
messages.append(
ChatMessage(
role="assistant",
content=f"I apologize, but I encountered an error: {str(e)}"
)
)
yield messages
def user_message(msg: str, history: list) -> tuple[str, list]:
"""Adds user message to chat history"""
history.append(ChatMessage(role="user", content=msg))
return "", history
# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
#with gr.Column():
gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭")
chatbot = gr.Chatbot(
type="messages",
label="Gemini2.0 'Thinking' Chatbot",
render_markdown=True,
scale=1,
avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
)
with gr.Row(equal_height=True):
input_box = gr.Textbox(
lines=1,
label="Chat Message",
placeholder="Type your message here...",
scale=4
)
clear_button = gr.Button("Clear Chat", scale=1)
# Set up event handlers
msg_store = gr.State("") # Store for preserving user message
input_box.submit(
lambda msg: (msg, msg, ""), # Store message and clear input
inputs=[input_box],
outputs=[msg_store, input_box, input_box],
queue=False
).then(
user_message, # Add user message to chat
inputs=[msg_store, chatbot],
outputs=[input_box, chatbot],
queue=False
).then(
stream_gemini_response, # Generate and stream response
inputs=[msg_store, chatbot],
outputs=chatbot
)
clear_button.click(
lambda: ([], "", ""),
outputs=[chatbot, input_box, msg_store],
queue=False
)
# Launch the interface
if __name__ == "__main__":
demo.launch(debug=True) |