Spaces:

ysharma
/

Gemini2-Flash-Thinking

Running

App Files Files Community

Gemini2-Flash-Thinking / app.py

ysharma HF Staff

Create app.py

a67a358 verified 7 months ago

raw

history blame

5.5 kB

	import os
	import gradio as gr
	from gradio import ChatMessage
	from typing import Iterator
	import google.generativeai as genai

	# get Gemini API Key from the environ variable
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	genai.configure(api_key=GEMINI_API_KEY)

	# we will be using the Gemini 2.0 Flash model with Thinking capabilities
	model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")


	def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
	"""
	Streams LLM's thoughts and response.
	"""
	try:
	# Enabling logging for users to understand how thinking works along with streaming
	print(f"\n=== New Request ===")
	print(f"User message: {user_message}")

	# Initialize response from Gemini
	response = model.generate_content(user_message, stream=True)

	# Initialize buffers and flags
	thought_buffer = ""
	response_buffer = ""
	has_response = False
	thinking_complete = False

	# Add initial thinking message
	messages.append(
	ChatMessage(
	role="assistant",
	content="",
	metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
	)
	)

	for chunk in response:
	parts = chunk.candidates[0].content.parts
	current_chunk = parts[0].text

	if len(parts) == 2 and not thinking_complete:
	# Complete thought
	thought_buffer += current_chunk
	print(f"\n=== Complete Thought ===\n{thought_buffer}")

	# Update thinking message
	messages[-1] = ChatMessage(
	role="assistant",
	content=thought_buffer,
	metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
	)
	yield messages

	# Start response
	response_buffer = parts[1].text
	print(f"\n=== Starting Response ===\n{response_buffer}")

	messages.append(
	ChatMessage(
	role="assistant",
	content=response_buffer
	)
	)

	thinking_complete = True
	has_response = True
	yield messages
	time.sleep(0.05) # Small delay for visible streaming

	elif thinking_complete:
	# Stream response
	response_buffer += current_chunk
	print(f"\n=== Response Chunk ===\n{current_chunk}")

	messages[-1] = ChatMessage(
	role="assistant",
	content=response_buffer
	)
	yield messages

	else:
	# Stream thinking
	thought_buffer += current_chunk
	print(f"\n=== Thinking Chunk ===\n{current_chunk}")

	messages[-1] = ChatMessage(
	role="assistant",
	content=thought_buffer,
	metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
	)
	yield messages

	# Log final complete response
	print(f"\n=== Final Response ===\n{response_buffer}")

	except Exception as e:
	print(f"\n=== Error ===\n{str(e)}")
	messages.append(
	ChatMessage(
	role="assistant",
	content=f"I apologize, but I encountered an error: {str(e)}"
	)
	)
	yield messages

	def user_message(msg: str, history: list) -> tuple[str, list]:
	"""Adds user message to chat history"""
	history.append(ChatMessage(role="user", content=msg))
	return "", history

	# Create the Gradio interface
	with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
	#with gr.Column():
	gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭")

	chatbot = gr.Chatbot(
	type="messages",
	label="Gemini2.0 'Thinking' Chatbot",
	render_markdown=True,
	scale=1,
	avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
	)

	with gr.Row(equal_height=True):
	input_box = gr.Textbox(
	lines=1,
	label="Chat Message",
	placeholder="Type your message here...",
	scale=4
	)

	clear_button = gr.Button("Clear Chat", scale=1)

	# Set up event handlers
	msg_store = gr.State("") # Store for preserving user message

	input_box.submit(
	lambda msg: (msg, msg, ""), # Store message and clear input
	inputs=[input_box],
	outputs=[msg_store, input_box, input_box],
	queue=False
	).then(
	user_message, # Add user message to chat
	inputs=[msg_store, chatbot],
	outputs=[input_box, chatbot],
	queue=False
	).then(
	stream_gemini_response, # Generate and stream response
	inputs=[msg_store, chatbot],
	outputs=chatbot
	)

	clear_button.click(
	lambda: ([], "", ""),
	outputs=[chatbot, input_box, msg_store],
	queue=False
	)

	# Launch the interface
	if __name__ == "__main__":
	demo.launch(debug=True)