Spaces:

Geraldine
/

Image-to-text-SmolVLM-for-Omeka

Running

Geraldine commited on Jan 25

Commit

67f296e

verified ·

1 Parent(s): c8ce5f0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,8 +13,9 @@ import os, stat, io
 # Load the model in half-precision on the available device(s)
 model = AutoModelForVision2Seq.from_pretrained(
     "./SmolVLM-500M-Instruct",
-    torch_dtype=torch.bfloat16,
-    _attn_implementation="eager"
 )
 processor = AutoProcessor.from_pretrained("./SmolVLM-500M-Instruct")
@@ -51,7 +52,13 @@ def describe_image(image_array):
     inputs = processor(text=prompt, images=[image], return_tensors="pt")
     # Inference: Generation of the output
-    generated_ids = model.generate(**inputs, max_new_tokens=500)
     output_ids = [
         generated_ids[len(input_ids) :]
         for input_ids, generated_ids in zip(inputs.input_ids, generated_ids)

 # Load the model in half-precision on the available device(s)
 model = AutoModelForVision2Seq.from_pretrained(
     "./SmolVLM-500M-Instruct",
+    torch_dtype=torch.float32,
+    #_attn_implementation="eager",
+    device_map="cpu"
 )
 processor = AutoProcessor.from_pretrained("./SmolVLM-500M-Instruct")
     inputs = processor(text=prompt, images=[image], return_tensors="pt")
     # Inference: Generation of the output
+    generated_ids = model.generate(
+        **inputs,
+        max_new_tokens=500,
+        num_beams=1,  # Disable beam search
+        do_sample=False,  # Disable sampling
+        temperature=1.0  # Set temperature to 1.0
+    )
     output_ids = [
         generated_ids[len(input_ids) :]
         for input_ids, generated_ids in zip(inputs.input_ids, generated_ids)