File size: 1,691 Bytes
03767fe
91cb1e2
aae7362
03767fe
 
aae7362
91cb1e2
 
 
 
 
 
03767fe
 
 
aae7362
 
 
 
 
03767fe
91cb1e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03767fe
 
 
 
aae7362
5bf4ffc
aae7362
 
 
 
03767fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import JSONResponse, HTMLResponse
from transformers import AutoProcessor, AutoModelForVision2Seq
from PIL import Image
import io
import torch
import os

# Make sure cache is writable
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
os.environ["HF_HUB_CACHE"] = "/app/cache/hub"

app = FastAPI()

# Load model and processor
processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
model = AutoModelForVision2Seq.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

@app.get("/")
def home():
    return {"message": "API is running. Use POST /predict with an image, or visit /upload to test in browser."}

@app.get("/upload", response_class=HTMLResponse)
def upload_form():
    return """
    <html>
      <body>
        <h2>Upload an ID Image</h2>
        <form action="/predict" enctype="multipart/form-data" method="post">
          <input name="file" type="file">
          <input type="submit" value="Upload">
        </form>
      </body>
    </html>
    """

@app.post("/predict")
async def predict_gender(file: UploadFile = File(...)):
    image_bytes = await file.read()
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

    prompt = "Is the person on this ID male or female?"
    inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=32)
    answer = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()

    return JSONResponse({"gender": answer})