import spaces import gradio as gr from transformers import pipeline import torch import os hf_token = os.environ["HF_TOKEN"] # Load the Gemma 3 pipeline pipe = pipeline( "image-text-to-text", model="google/gemma-3-4b-it", device="cuda", torch_dtype=torch.bfloat16, use_auth_token=hf_token ) @spaces.GPU def get_response(message, chat_history, image): # Check if image is provided if image is None: chat_history.append((message, "Please upload an image (required)")) return "", chat_history messages = [ { "role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}] } ] user_content = [{"type": "image", "image": image}] # Add text message if provided if message: user_content.append({"type": "text", "text": message}) messages.append({"role": "user", "content": user_content}) # Call the pipeline output = pipe(text=messages, max_new_tokens=200) try: response = output[0]["generated_text"][-1]["content"] chat_history.append((message, response)) except (KeyError, IndexError, TypeError) as e: error_message = f"Error processing the response: {str(e)}" chat_history.append((message, error_message)) return "", chat_history with gr.Blocks() as demo: gr.Markdown("# Gemma 3 Image Chat") gr.Markdown("Chat with Gemma 3 about images. Image upload is required for each message.") chatbot = gr.Chatbot() with gr.Row(): msg = gr.Textbox( show_label=False, placeholder="Type your message here about the image...", scale=4 ) img = gr.Image( type="pil", label="Upload image (required)", scale=1 ) submit_btn = gr.Button("Send") # Clear button to reset the interface clear_btn = gr.Button("Clear") def clear_interface(): return "", [], None submit_btn.click( get_response, inputs=[msg, chatbot, img], outputs=[msg, chatbot] ) msg.submit( get_response, inputs=[msg, chatbot, img], outputs=[msg, chatbot] ) clear_btn.click( clear_interface, inputs=None, outputs=[msg, chatbot, img] ) if __name__ == "__main__": demo.launch()