Spaces:

SkalskiP
/

YOLO-World

Runtime error

App Files Files Community

SkalskiP commited on Feb 16, 2024

Commit

3c9b48f

1 Parent(s): 30841fb

Advanced configuration added

Browse files

Files changed (1) hide show

app.py +105 -17

app.py CHANGED Viewed

@@ -1,20 +1,23 @@
 from typing import List
 import cv2
-import torch
 import gradio as gr
 import numpy as np
 import supervision as sv
 from inference.models import YOLOWorld
 from utils.efficient_sam import load, inference_with_box
 MARKDOWN = """
-# YOLO-World 🔥 [with Efficient-SAM]
-This is a demo of zero-shot instance segmentation using [YOLO-World](https://github.com/AILab-CVC/YOLO-World) and [Efficient-SAM](https://github.com/yformer/EfficientSAM).
-Powered by Roboflow [Inference](https://github.com/roboflow/inference) and [Supervision](https://github.com/roboflow/supervision).
 """
 EXAMPLES = [
@@ -35,19 +38,21 @@ def process_categories(categories: str) -> List[str]:
 def process_image(
-        input_image: np.ndarray,
-        categories: str,
-        confidence_threshold: float = 0.005,
-        iou_threshold: float = 0.5,
-        with_segmentation: bool = True,
-        with_confidence: bool = False,
-        with_class_agnostic_nms: bool = False,
 ) -> np.ndarray:
     categories = process_categories(categories)
     YOLO_WORLD_MODEL.set_classes(categories)
     results = YOLO_WORLD_MODEL.infer(input_image, confidence=confidence_threshold)
     detections = sv.Detections.from_inference(results)
-    detections = detections.with_nms(class_agnostic=with_class_agnostic_nms, threshold=iou_threshold)
     if with_segmentation:
         masks = []
         for [x_min, y_min, x_max, y_max] in detections.xyxy:
@@ -57,7 +62,11 @@ def process_image(
         detections.mask = np.array(masks)
     labels = [
-        f"{categories[class_id]}: {confidence:.2f}" if with_confidence else f"{categories[class_id]}"
         for class_id, confidence in
         zip(detections.class_id, detections.confidence)
     ]
@@ -70,8 +79,67 @@ def process_image(
     return output_image
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
     with gr.Row():
         input_image_component = gr.Image(
             type='numpy',
@@ -85,19 +153,39 @@ with gr.Blocks() as demo:
         categories_text_component = gr.Textbox(
             label='Categories',
             placeholder='comma separated list of categories',
-            scale=5
         )
-        submit_button_component = gr.Button('Submit', scale=1)
     gr.Examples(
         fn=process_image,
         examples=EXAMPLES,
-        inputs=[input_image_component, categories_text_component],
         outputs=output_image_component
     )
     submit_button_component.click(
         fn=process_image,
-        inputs=[input_image_component, categories_text_component],
         outputs=output_image_component
     )

 from typing import List
 import cv2
 import gradio as gr
 import numpy as np
 import supervision as sv
+import torch
 from inference.models import YOLOWorld
 from utils.efficient_sam import load, inference_with_box
 MARKDOWN = """
+# YOLO-World + EfficientSAM 🔥
+This is a demo of zero-shot instance segmentation using
+[YOLO-World](https://github.com/AILab-CVC/YOLO-World) and
+[EfficientSAM](https://github.com/yformer/EfficientSAM).
+Powered by Roboflow [Inference](https://github.com/roboflow/inference) and
+[Supervision](https://github.com/roboflow/supervision).
 """
 EXAMPLES = [
 def process_image(
+    input_image: np.ndarray,
+    categories: str,
+    confidence_threshold: float = 0.3,
+    iou_threshold: float = 0.5,
+    with_segmentation: bool = True,
+    with_confidence: bool = False,
+    with_class_agnostic_nms: bool = False,
 ) -> np.ndarray:
     categories = process_categories(categories)
     YOLO_WORLD_MODEL.set_classes(categories)
     results = YOLO_WORLD_MODEL.infer(input_image, confidence=confidence_threshold)
     detections = sv.Detections.from_inference(results)
+    detections = detections.with_nms(
+        class_agnostic=with_class_agnostic_nms,
+        threshold=iou_threshold)
     if with_segmentation:
         masks = []
         for [x_min, y_min, x_max, y_max] in detections.xyxy:
         detections.mask = np.array(masks)
     labels = [
+        (
+            f"{categories[class_id]}: {confidence:.2f}"
+            if with_confidence
+            else f"{categories[class_id]}"
+        )
         for class_id, confidence in
         zip(detections.class_id, detections.confidence)
     ]
     return output_image
+confidence_threshold_component = gr.Slider(
+    minimum=0,
+    maximum=1.0,
+    value=0.3,
+    step=0.01,
+    label="Confidence Threshold",
+    info=(
+        "The confidence threshold for the YOLO-World model. Lower the threshold to "
+        "reduce false negatives, enhancing the model's sensitivity to detect "
+        "sought-after objects. Conversely, increase the threshold to minimize false "
+        "positives, preventing the model from identifying objects it shouldn't."
+    ))
+iou_threshold_component = gr.Slider(
+    minimum=0,
+    maximum=1.0,
+    value=0.5,
+    step=0.01,
+    label="IoU Threshold",
+    info=(
+        "The Intersection over Union (IoU) threshold for non-maximum suppression. "
+        "Decrease the value to lessen the occurrence of overlapping bounding boxes, "
+        "making the detection process stricter. On the other hand, increase the value "
+        "to allow more overlapping bounding boxes, accommodating a broader range of "
+        "detections."
+    ))
+with_segmentation_component = gr.Checkbox(
+    value=True,
+    label="With Segmentation",
+    info=(
+        "Whether to run EfficientSAM for instance segmentation."
+    )
+)
+with_confidence_component = gr.Checkbox(
+    value=False,
+    label="Display Confidence",
+    info=(
+        "Whether to display the confidence of the detected objects."
+    )
+)
+with_class_agnostic_nms_component = gr.Checkbox(
+    value=False,
+    label="Use Class-Agnostic NMS",
+    info=(
+        "Suppress overlapping bounding boxes across all classes."
+    )
+)
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
+    with gr.Accordion("Configuration", open=False):
+        confidence_threshold_component.render()
+        iou_threshold_component.render()
+        with gr.Row():
+            with_segmentation_component.render()
+            with_confidence_component.render()
+            with_class_agnostic_nms_component.render()
     with gr.Row():
         input_image_component = gr.Image(
             type='numpy',
         categories_text_component = gr.Textbox(
             label='Categories',
             placeholder='comma separated list of categories',
+            scale=7
+        )
+        submit_button_component = gr.Button(
+            value='Submit',
+            scale=1,
+            variant='primary'
         )
     gr.Examples(
         fn=process_image,
         examples=EXAMPLES,
+        inputs=[
+            input_image_component,
+            categories_text_component,
+            confidence_threshold_component,
+            iou_threshold_component,
+            with_segmentation_component,
+            with_confidence_component,
+            with_class_agnostic_nms_component
+        ],
         outputs=output_image_component
     )
     submit_button_component.click(
         fn=process_image,
+        inputs=[
+            input_image_component,
+            categories_text_component,
+            confidence_threshold_component,
+            iou_threshold_component,
+            with_segmentation_component,
+            with_confidence_component,
+            with_class_agnostic_nms_component
+        ],
         outputs=output_image_component
     )