from fastapi import FastAPI, File, UploadFile
from fastapi.responses import JSONResponse
from transformers import pipeline
from PIL import Image
import io

app = FastAPI()

# Initialize SmolVLM for image-to-text tasks
pipe = pipeline("image-to-text", model="HuggingFaceTB/SmolVLM-Instruct", device=-1)

@app.post("/predict")
async def predict_gender(file: UploadFile = File(...)):
    image_bytes = await file.read()
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    
    # Prompt the model to identify gender
    prompt = "Is the person on this ID male or female?"
    result = pipe(image, prompt)
    answer = result[0]['generated_text'].strip()
    
    return JSONResponse({"gender": answer})