Spaces:

breadlicker45
/

PaliGemma2

Sleeping

breadlicker45 commited on Dec 15, 2024

Commit

a1117d5

verified ·

1 Parent(s): dfd6583

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -43,11 +43,8 @@ def process_image_and_text(image_pil, text_input, num_beams, temperature, seed):
         # Load the image using load_image
         image = load_image(image_pil)
-        # Add <image> token to the beginning of the text prompt
-        text_input = text_input
         # Use the provided text input
-        model_inputs = processor(text=text_input, images=image, return_tensors="pt").to(
             device, dtype=torch.bfloat16
         )
         input_len = model_inputs["input_ids"].shape[-1]
@@ -70,10 +67,9 @@ if __name__ == "__main__":
         fn=process_image_and_text,
         inputs=[
             gr.Image(type="pil", label="Upload an image"),
-            gr.Textbox(label="Enter Text Prompt"),
             gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Beams"),
             gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=1.0, label="Temperature"),
-            gr.Number(label="Random Seed", value=42, precision=0),
         ],
         outputs=gr.Textbox(label="Generated Text"),
         title="PaliGemma2 Image and Text to Text",

         # Load the image using load_image
         image = load_image(image_pil)
         # Use the provided text input
+        model_inputs = processor(images=image, return_tensors="pt").to(
             device, dtype=torch.bfloat16
         )
         input_len = model_inputs["input_ids"].shape[-1]
         fn=process_image_and_text,
         inputs=[
             gr.Image(type="pil", label="Upload an image"),
             gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Beams"),
             gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=1.0, label="Temperature"),
+            gr.Number(label="Random Seed", value=0, precision=0),
         ],
         outputs=gr.Textbox(label="Generated Text"),
         title="PaliGemma2 Image and Text to Text",