ysharma's picture
ysharma HF staff
Create app.py
a67a358 verified
raw
history blame
5.5 kB
import os
import gradio as gr
from gradio import ChatMessage
from typing import Iterator
import google.generativeai as genai
# get Gemini API Key from the environ variable
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)
# we will be using the Gemini 2.0 Flash model with Thinking capabilities
model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
"""
Streams LLM's thoughts and response.
"""
try:
# Enabling logging for users to understand how thinking works along with streaming
print(f"\n=== New Request ===")
print(f"User message: {user_message}")
# Initialize response from Gemini
response = model.generate_content(user_message, stream=True)
# Initialize buffers and flags
thought_buffer = ""
response_buffer = ""
has_response = False
thinking_complete = False
# Add initial thinking message
messages.append(
ChatMessage(
role="assistant",
content="",
metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
)
)
for chunk in response:
parts = chunk.candidates[0].content.parts
current_chunk = parts[0].text
if len(parts) == 2 and not thinking_complete:
# Complete thought
thought_buffer += current_chunk
print(f"\n=== Complete Thought ===\n{thought_buffer}")
# Update thinking message
messages[-1] = ChatMessage(
role="assistant",
content=thought_buffer,
metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
)
yield messages
# Start response
response_buffer = parts[1].text
print(f"\n=== Starting Response ===\n{response_buffer}")
messages.append(
ChatMessage(
role="assistant",
content=response_buffer
)
)
thinking_complete = True
has_response = True
yield messages
time.sleep(0.05) # Small delay for visible streaming
elif thinking_complete:
# Stream response
response_buffer += current_chunk
print(f"\n=== Response Chunk ===\n{current_chunk}")
messages[-1] = ChatMessage(
role="assistant",
content=response_buffer
)
yield messages
else:
# Stream thinking
thought_buffer += current_chunk
print(f"\n=== Thinking Chunk ===\n{current_chunk}")
messages[-1] = ChatMessage(
role="assistant",
content=thought_buffer,
metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
)
yield messages
# Log final complete response
print(f"\n=== Final Response ===\n{response_buffer}")
except Exception as e:
print(f"\n=== Error ===\n{str(e)}")
messages.append(
ChatMessage(
role="assistant",
content=f"I apologize, but I encountered an error: {str(e)}"
)
)
yield messages
def user_message(msg: str, history: list) -> tuple[str, list]:
"""Adds user message to chat history"""
history.append(ChatMessage(role="user", content=msg))
return "", history
# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
#with gr.Column():
gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭")
chatbot = gr.Chatbot(
type="messages",
label="Gemini2.0 'Thinking' Chatbot",
render_markdown=True,
scale=1,
avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
)
with gr.Row(equal_height=True):
input_box = gr.Textbox(
lines=1,
label="Chat Message",
placeholder="Type your message here...",
scale=4
)
clear_button = gr.Button("Clear Chat", scale=1)
# Set up event handlers
msg_store = gr.State("") # Store for preserving user message
input_box.submit(
lambda msg: (msg, msg, ""), # Store message and clear input
inputs=[input_box],
outputs=[msg_store, input_box, input_box],
queue=False
).then(
user_message, # Add user message to chat
inputs=[msg_store, chatbot],
outputs=[input_box, chatbot],
queue=False
).then(
stream_gemini_response, # Generate and stream response
inputs=[msg_store, chatbot],
outputs=chatbot
)
clear_button.click(
lambda: ([], "", ""),
outputs=[chatbot, input_box, msg_store],
queue=False
)
# Launch the interface
if __name__ == "__main__":
demo.launch(debug=True)