from fastapi import FastAPI, File, UploadFile from fastapi.responses import JSONResponse from transformers import pipeline from PIL import Image import io app = FastAPI() # Initialize SmolVLM for image-to-text tasks pipe = pipeline("image-to-text", model="HuggingFaceTB/SmolVLM-Instruct", device=-1) @app.post("/predict") async def predict_gender(file: UploadFile = File(...)): image_bytes = await file.read() image = Image.open(io.BytesIO(image_bytes)).convert("RGB") # Prompt the model to identify gender prompt = "Is the person on this ID male or female?" result = pipe(image, prompt) answer = result[0]['generated_text'].strip() return JSONResponse({"gender": answer})