Spaces:

ReactLover
/

Gemma3n

Sleeping

File size: 1,131 Bytes

from fastapi import FastAPI, File, UploadFile
from fastapi.responses import JSONResponse
from transformers import pipeline
from PIL import Image
import io

app = FastAPI()

# Initialize the Gemma 3n E2B pipeline
pipe = pipeline(
    "image-text-to-text",
    model="google/gemma-3n-e2b-it",
    device=-1  # CPU inference
)

@app.post("/predict")
async def predict_gender(file: UploadFile = File(...)):
    image_bytes = await file.read()
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    
    messages = [
        {
            "role": "system",
            "content": [{"type": "text", "text": "You are a helpful assistant."}]
        },
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": "Is the person on this ID male or female?"}
            ]
        }
    ]
    
    result = pipe(text=messages, max_new_tokens=32)
    # Each generated message has list of generated segments; take the last
    answer = result[0]["generated_text"][-1]["content"].strip()
    
    return JSONResponse({"gender": answer})