Spaces:

ReactLover
/

Gemma3n

Sleeping

ReactLover commited on 16 days ago

Commit

99cf18a

verified ·

1 Parent(s): 03767fe

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,11 +6,11 @@ import io
 app = FastAPI()
-# Initialize multimodal pipeline with Gemma 3n E2B (instruct)
 pipe = pipeline(
-    task="image-text-to-text",
     model="google/gemma-3n-e2b-it",
-    device=-1
 )
 @app.post("/predict")
@@ -18,11 +18,22 @@ async def predict_gender(file: UploadFile = File(...)):
     image_bytes = await file.read()
     image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-    prompt = [
-        {"type": "image", "data": image},
-        {"type": "text", "text": "Is the person on this ID male or female?"}
     ]
-    result = pipe(prompt)
-    answer = result[0]["generated_text"].strip()
     return JSONResponse({"gender": answer})

 app = FastAPI()
+# Initialize the Gemma 3n E2B pipeline
 pipe = pipeline(
+    "image-text-to-text",
     model="google/gemma-3n-e2b-it",
+    device=-1  # CPU inference
 )
 @app.post("/predict")
     image_bytes = await file.read()
     image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+    messages = [
+        {
+            "role": "system",
+            "content": [{"type": "text", "text": "You are a helpful assistant."}]
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": "Is the person on this ID male or female?"}
+            ]
+        }
     ]
+    result = pipe(text=messages, max_new_tokens=32)
+    # Each generated message has list of generated segments; take the last
+    answer = result[0]["generated_text"][-1]["content"].strip()
     return JSONResponse({"gender": answer})