mjavaid
first commit
5253b6b
raw
history blame
1.68 kB
import gradio as gr
from transformers import pipeline
import torch
import os
import spaces
hf_token = os.environ["HF_TOKEN"]
# Load the Gemma 3 pipeline.
pipe = pipeline(
"image-text-to-text",
model="google/gemma-3-4b-it",
device="cuda",
torch_dtype=torch.bfloat16,
use_auth_token=hf_token
)
@spaces.GPU
def generate_response(user_text, user_image):
# Check if an image was uploaded.
if user_image is None:
return "Error: An image upload is mandatory."
# Prepare messages with the system prompt and user inputs.
messages = [
{
"role": "system",
"content": [{"type": "text", "text": "You are a helpful assistant."}]
}
]
user_content = [{"type": "image", "image": user_image}]
if user_text:
user_content.append({"type": "text", "text": user_text})
messages.append({"role": "user", "content": user_content})
# Call the pipeline.
output = pipe(text=messages, max_new_tokens=200)
# Try to extract the generated content.
try:
response = output[0]["generated_text"][-1]["content"]
except (KeyError, IndexError, TypeError):
response = str(output)
return response
iface = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(label="Message", placeholder="Type your message here..."),
gr.Image(type="pil", label="Upload an Image", source="upload")
],
outputs=gr.Textbox(label="Response"),
title="Gemma 3 Simple Interface",
description="Enter your message and upload an image (image upload is mandatory) to get a response."
)
if __name__ == "__main__":
iface.launch()