Spaces:

breadlicker45
/

PaliGemma2

Running on Zero

breadlicker45 commited on Dec 15, 2024

Commit

dcd8e07

verified ·

1 Parent(s): dc3c0b2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,37 +24,40 @@ def load_model():
     # Load the processor and model using the correct identifier
     model_id = "google/paligemma2-28b-pt-448"
-    processor = PaliGemmaProcessor.from_pretrained(model_id, use_auth_token=token)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = PaliGemmaForConditionalGeneration.from_pretrained(
-        model_id, torch_dtype=torch.bfloat16, use_auth_token=token
     ).to(device).eval()
     return processor, model
-@spaces.GPU  # Decorate the function that uses the GPU
 def process_image_and_text(image_pil, text_input):
     """Extract text from image using PaliGemma2."""
-    processor, model = load_model()
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    # Load the image using load_image
-    # We can pass the PIL image directly to load_image
-    image = load_image(image_pil)
-    # Use the provided text input
-    model_inputs = processor(text=text_input, images=image, return_tensors="pt").to(
-        device, dtype=torch.bfloat16
-    )
-    input_len = model_inputs["input_ids"].shape[-1]
-    with torch.inference_mode():
-        generation = model.generate(**model_inputs, max_new_tokens=100, do_sample=False)
-        generation = generation[0][input_len:]
-        decoded = processor.decode(generation, skip_special_tokens=True)
-    return decoded
 if __name__ == "__main__":

     # Load the processor and model using the correct identifier
     model_id = "google/paligemma2-28b-pt-448"
+    processor = PaliGemmaProcessor.from_pretrained(model_id, token=token)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = PaliGemmaForConditionalGeneration.from_pretrained(
+        model_id, torch_dtype=torch.bfloat16, token=token
     ).to(device).eval()
     return processor, model
+@spaces.GPU(duration=120)  # Increased timeout to 120 seconds
 def process_image_and_text(image_pil, text_input):
     """Extract text from image using PaliGemma2."""
+    try:
+        processor, model = load_model()
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Load the image using load_image
+        image = load_image(image_pil)
+        # Use the provided text input
+        model_inputs = processor(text=text_input, images=image, return_tensors="pt").to(
+            device, dtype=torch.bfloat16
+        )
+        input_len = model_inputs["input_ids"].shape[-1]
+        with torch.inference_mode():
+            generation = model.generate(**model_inputs, max_new_tokens=100, do_sample=False)
+            generation = generation[0][input_len:]
+            decoded = processor.decode(generation, skip_special_tokens=True)
+        return decoded
+    except Exception as e:
+        print(f"Error during GPU task: {e}")
+        raise gr.Error(f"GPU task failed: {e}")
 if __name__ == "__main__":