ReactLover commited on
Commit
99cf18a
·
verified ·
1 Parent(s): 03767fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -6,11 +6,11 @@ import io
6
 
7
  app = FastAPI()
8
 
9
- # Initialize multimodal pipeline with Gemma 3n E2B (instruct)
10
  pipe = pipeline(
11
- task="image-text-to-text",
12
  model="google/gemma-3n-e2b-it",
13
- device=-1
14
  )
15
 
16
  @app.post("/predict")
@@ -18,11 +18,22 @@ async def predict_gender(file: UploadFile = File(...)):
18
  image_bytes = await file.read()
19
  image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
20
 
21
- prompt = [
22
- {"type": "image", "data": image},
23
- {"type": "text", "text": "Is the person on this ID male or female?"}
 
 
 
 
 
 
 
 
 
24
  ]
25
 
26
- result = pipe(prompt)
27
- answer = result[0]["generated_text"].strip()
 
 
28
  return JSONResponse({"gender": answer})
 
6
 
7
  app = FastAPI()
8
 
9
+ # Initialize the Gemma 3n E2B pipeline
10
  pipe = pipeline(
11
+ "image-text-to-text",
12
  model="google/gemma-3n-e2b-it",
13
+ device=-1 # CPU inference
14
  )
15
 
16
  @app.post("/predict")
 
18
  image_bytes = await file.read()
19
  image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
20
 
21
+ messages = [
22
+ {
23
+ "role": "system",
24
+ "content": [{"type": "text", "text": "You are a helpful assistant."}]
25
+ },
26
+ {
27
+ "role": "user",
28
+ "content": [
29
+ {"type": "image", "image": image},
30
+ {"type": "text", "text": "Is the person on this ID male or female?"}
31
+ ]
32
+ }
33
  ]
34
 
35
+ result = pipe(text=messages, max_new_tokens=32)
36
+ # Each generated message has list of generated segments; take the last
37
+ answer = result[0]["generated_text"][-1]["content"].strip()
38
+
39
  return JSONResponse({"gender": answer})