Florence-2-base-Castollux-v0.6

Running

App Files Files Community

xzuyn commited on Feb 7

Commit

b329cda

verified ·

1 Parent(s): 240d96f

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -13

app.py CHANGED Viewed

@@ -20,10 +20,11 @@ model.to(device)
 TITLE = f"# [{model_name}](https://huggingface.co/{model_name})"
-def process_image(image):
     """
     Process a single image to generate a caption.
     Supports image input as file path, numpy array, or PIL Image.
     """
     try:
         # Convert input to PIL image if necessary
@@ -45,8 +46,10 @@ def process_image(image):
                 input_ids=inputs["input_ids"],
                 pixel_values=inputs["pixel_values"],
                 max_new_tokens=1024,
-                num_beams=5,
                 do_sample=True,
             )
         # Decode and post-process the generated text
@@ -71,28 +74,43 @@ with gr.Blocks(css=css) as demo:
         with gr.Row():
             with gr.Column():
                 input_img = gr.Image(label="Input Picture")
-                submit_btn = gr.Button(value="Submit")
             with gr.Column():
                 output_text = gr.Textbox(label="Output Text")
         gr.Examples(
             [
-                ["eval_img_1.jpg"],
-                ["eval_img_2.jpg"],
-                ["eval_img_3.jpg"],
-                ["eval_img_4.jpg"],
-                ["eval_img_5.jpg"],
-                ["eval_img_6.jpg"],
-                ["eval_img_7.png"],
-                ["eval_img_8.jpg"],
             ],
-            inputs=[input_img],
             outputs=[output_text],
             fn=process_image,
             label="Try captioning on below examples",
         )
-        submit_btn.click(process_image, [input_img], [output_text])
 if __name__ == "__main__":
     demo.launch(debug=True)

 TITLE = f"# [{model_name}](https://huggingface.co/{model_name})"
+def process_image(image, num_beams=5, min_p=0.0, top_p=1.0):
     """
     Process a single image to generate a caption.
     Supports image input as file path, numpy array, or PIL Image.
+    Generation settings (num_beams, min_p, top_p) can be customized.
     """
     try:
         # Convert input to PIL image if necessary
                 input_ids=inputs["input_ids"],
                 pixel_values=inputs["pixel_values"],
                 max_new_tokens=1024,
+                num_beams=num_beams,
                 do_sample=True,
+                top_p=top_p,
+                min_p=min_p,
             )
         # Decode and post-process the generated text
         with gr.Row():
             with gr.Column():
                 input_img = gr.Image(label="Input Picture")
             with gr.Column():
                 output_text = gr.Textbox(label="Output Text")
+                submit_btn = gr.Button(value="Submit")
+                num_beams_slider = gr.Slider(
+                    minimum=1, maximum=5, step=1, value=5, label="Number of Beams"
+                )
+                min_p_slider = gr.Slider(
+                    minimum=0, maximum=1, step=0.01, value=0.0, label="Min-P"
+                )
+                top_p_slider = gr.Slider(
+                    minimum=0, maximum=1, step=0.01, value=1.0, label="Top-P"
+                )
         gr.Examples(
             [
+                ["eval_img_1.jpg", 5, 0.0, 1.0],
+                ["eval_img_2.jpg", 5, 0.0, 1.0],
+                ["eval_img_3.jpg", 5, 0.0, 1.0],
+                ["eval_img_4.jpg", 5, 0.0, 1.0],
+                ["eval_img_5.jpg", 5, 0.0, 1.0],
+                ["eval_img_6.jpg", 5, 0.0, 1.0],
+                ["eval_img_7.png", 5, 0.0, 1.0],
+                ["eval_img_8.jpg", 5, 0.0, 1.0],
             ],
+            inputs=[input_img, num_beams_slider, min_p_slider, top_p_slider],
             outputs=[output_text],
             fn=process_image,
             label="Try captioning on below examples",
         )
+        submit_btn.click(
+            process_image,
+            [input_img, num_beams_slider, min_p_slider, top_p_slider],
+            [output_text]
+        )
 if __name__ == "__main__":
     demo.launch(debug=True)