Spaces:

fcakyon
/

sahi-yolo11

Running on Zero

App Files Files Community

fcakyon

atalaydenknalbant commited on Jul 22

Commit

895de79

verified ·

1 Parent(s): c37fa9e

Enhanced YOLOv11 SAHI Demo with Dynamic Model Loading, UI Controls and MCP Compatibility (#5)

Browse files

- Enhanced YOLOv11 SAHI Demo with Dynamic Model Loading, UI Controls and MCP Compatibility (ce331e8383d49b9480287420f1341dc69a57f1dd)

Co-authored-by: Atalay Denknalbant <[email protected]>

Files changed (1) hide show

app.py +178 -55

app.py CHANGED Viewed

@@ -6,10 +6,9 @@ import sahi.slicing
 from PIL import Image
 import numpy
 from ultralytics import YOLO
 import sys
 import types
 if 'huggingface_hub.utils._errors' not in sys.modules:
     mock_errors = types.ModuleType('_errors')
     mock_errors.RepositoryNotFoundError = Exception
@@ -37,15 +36,33 @@ sahi.utils.file.download_from_url(
     "highway3.jpg",
 )
-# Model
-model = AutoDetectionModel.from_pretrained(
-    model_type="ultralytics", model_path="yolo11s.pt", device="cpu", confidence_threshold=0.5, image_size=IMAGE_SIZE
-)
 def sahi_yolo_inference(
     image,
     slice_height=512,
     slice_width=512,
     overlap_height_ratio=0.2,
@@ -55,6 +72,29 @@ def sahi_yolo_inference(
     postprocess_match_threshold=0.5,
     postprocess_class_agnostic=False,
 ):
     image_width, image_height = image.size
     sliced_bboxes = sahi.slicing.get_slice_bboxes(
@@ -71,18 +111,24 @@ def sahi_yolo_inference(
             f"{len(sliced_bboxes)} slices are too much for huggingface spaces, try smaller slice size."
         )
-    # standard inference
     prediction_result_1 = sahi.predict.get_prediction(
-        image=image, detection_model=model
     )
-    print(image)
     visual_result_1 = sahi.utils.cv.visualize_object_predictions(
         image=numpy.array(image),
         object_prediction_list=prediction_result_1.object_prediction_list,
     )
     output_1 = Image.fromarray(visual_result_1["image"])
-    # sliced inference
     prediction_result_2 = sahi.predict.get_sliced_prediction(
         image=image,
         detection_model=model,
@@ -95,6 +141,13 @@ def sahi_yolo_inference(
         postprocess_match_threshold=postprocess_match_threshold,
         postprocess_class_agnostic=postprocess_class_agnostic,
     )
     visual_result_2 = sahi.utils.cv.visualize_object_predictions(
         image=numpy.array(image),
         object_prediction_list=prediction_result_2.object_prediction_list,
@@ -105,48 +158,118 @@ def sahi_yolo_inference(
     return output_1, output_2
-inputs = [
-    gr.Image(type="pil", label="Original Image"),
-    gr.Number(value=512, label="slice_height"),
-    gr.Number(value=512, label="slice_width"),
-    gr.Number(value=0.2, label="overlap_height_ratio"),
-    gr.Number(value=0.2, label="overlap_width_ratio"),
-    gr.Dropdown(
-        ["NMS", "GREEDYNMM"],
-        type="value",
-        value="NMS",
-        label="postprocess_type",
-    ),
-    gr.Dropdown(
-        ["IOU", "IOS"], type="value", value="IOU", label="postprocess_type"
-    ),
-    gr.Number(value=0.5, label="postprocess_match_threshold"),
-    gr.Checkbox(value=True, label="postprocess_class_agnostic"),
-]
-outputs = [
-    gr.Image(type="pil", label="YOLO11s Standard"),
-    gr.Image(type="pil", label="YOLO11s + SAHI Sliced"),
-]
-title = "Small Object Detection with SAHI + YOLO11"
-description = "SAHI + YOLO11 demo for small object detection. Upload your own image or click an example image to use."
-article = "<p style='text-align: center'>SAHI is a lightweight vision library for performing large scale object detection/ instance segmentation.. <a href='https://github.com/obss/sahi'>SAHI Github</a> | <a href='https://medium.com/codable/sahi-a-vision-library-for-performing-sliced-inference-on-large-images-small-objects-c8b086af3b80'>SAHI Blog</a> </p>"
-examples = [
-    ["apple_tree.jpg", 256, 256, 0.2, 0.2, "NMS", "IOU", 0.4, True],
-    ["highway.jpg", 256, 256, 0.2, 0.2, "NMS", "IOU", 0.4, True],
-    ["highway2.jpg", 512, 512, 0.2, 0.2, "NMS", "IOU", 0.4, True],
-    ["highway3.jpg", 512, 512, 0.2, 0.2, "NMS", "IOU", 0.4, True],
-]
-gr.Interface(
-    sahi_yolo_inference,
-    inputs,
-    outputs,
-    title=title,
-    description=description,
-    article=article,
-    examples=examples,
-    theme="huggingface",
-    cache_examples=True,
-).launch(debug=True)

 from PIL import Image
 import numpy
 from ultralytics import YOLO
 import sys
 import types
 if 'huggingface_hub.utils._errors' not in sys.modules:
     mock_errors = types.ModuleType('_errors')
     mock_errors.RepositoryNotFoundError = Exception
     "highway3.jpg",
 )
+# Global model variable
+model = None
+def load_yolo_model(model_name, confidence_threshold=0.5):
+    """
+    Loads a YOLOv11 detection model.
+    Args:
+        model_name (str): The name of the YOLOv11 model to load (e.g., "yolo11n.pt").
+        confidence_threshold (float): The confidence threshold for object detection.
+    Returns:
+        AutoDetectionModel: The loaded SAHI AutoDetectionModel.
+    """
+    global model
+    model_path = model_name
+    model = AutoDetectionModel.from_pretrained(
+        model_type="ultralytics", model_path=model_path, device="cpu",
+        confidence_threshold=confidence_threshold, image_size=IMAGE_SIZE
+    )
+    return model
 def sahi_yolo_inference(
     image,
+    yolo_model_name,
+    confidence_threshold,
+    max_detections,
     slice_height=512,
     slice_width=512,
     overlap_height_ratio=0.2,
     postprocess_match_threshold=0.5,
     postprocess_class_agnostic=False,
 ):
+    """
+    Performs object detection using SAHI with a specified YOLOv11 model.
+    Args:
+        image (PIL.Image.Image): The input image for detection.
+        yolo_model_name (str): The name of the YOLOv11 model to use for inference.
+        confidence_threshold (float): The confidence threshold for object detection.
+        max_detections (int): The maximum number of detections to return.
+        slice_height (int): The height of each slice for sliced inference.
+        slice_width (int): The width of each slice for sliced inference.
+        overlap_height_ratio (float): The overlap ratio for slice height.
+        overlap_width_ratio (float): The overlap ratio for slice width.
+        postprocess_type (str): The type of postprocessing to apply ("NMS" or "GREEDYNMM").
+        postprocess_match_metric (str): The metric for postprocessing matching ("IOU" or "IOS").
+        postprocess_match_threshold (float): The threshold for postprocessing matching.
+        postprocess_class_agnostic (bool): Whether postprocessing should be class agnostic.
+    Returns:
+        tuple: A tuple containing two PIL.Image.Image objects:
+               - The image with standard YOLO inference results.
+               - The image with SAHI sliced YOLO inference results.
+    """
+    load_yolo_model(yolo_model_name, confidence_threshold)
     image_width, image_height = image.size
     sliced_bboxes = sahi.slicing.get_slice_bboxes(
             f"{len(sliced_bboxes)} slices are too much for huggingface spaces, try smaller slice size."
         )
+    # Standard inference
     prediction_result_1 = sahi.predict.get_prediction(
+        image=image, detection_model=model,
     )
+    # Filter by max_detections for standard inference
+    if max_detections is not None and len(prediction_result_1.object_prediction_list) > max_detections:
+        prediction_result_1.object_prediction_list = sorted(
+            prediction_result_1.object_prediction_list, key=lambda x: x.score.value, reverse=True
+        )[:max_detections]
     visual_result_1 = sahi.utils.cv.visualize_object_predictions(
         image=numpy.array(image),
         object_prediction_list=prediction_result_1.object_prediction_list,
     )
     output_1 = Image.fromarray(visual_result_1["image"])
+    # Sliced inference
     prediction_result_2 = sahi.predict.get_sliced_prediction(
         image=image,
         detection_model=model,
         postprocess_match_threshold=postprocess_match_threshold,
         postprocess_class_agnostic=postprocess_class_agnostic,
     )
+    # Filter by max_detections for sliced inference
+    if max_detections is not None and len(prediction_result_2.object_prediction_list) > max_detections:
+        prediction_result_2.object_prediction_list = sorted(
+            prediction_result_2.object_prediction_list, key=lambda x: x.score.value, reverse=True
+        )[:max_detections]
     visual_result_2 = sahi.utils.cv.visualize_object_predictions(
         image=numpy.array(image),
         object_prediction_list=prediction_result_2.object_prediction_list,
     return output_1, output_2
+with gr.Blocks() as app:
+    gr.Markdown("# Small Object Detection with SAHI + YOLOv11")
+    gr.Markdown(
+        "SAHI + YOLOv11 demo for small object detection. "
+        "Upload your own image or click an example image to use."
+    )
+    with gr.Row():
+        with gr.Column():
+            original_image_input = gr.Image(type="pil", label="Original Image")
+            yolo_model_dropdown = gr.Dropdown(
+                choices=["yolo11n.pt", "yolo11s.pt", "yolo11m.pt", "yolo11l.pt", "yolo11x.pt"],
+                value="yolo11s.pt",
+                label="YOLOv11 Model",
+            )
+            confidence_threshold_slider = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=0.5,
+                label="Confidence Threshold",
+            )
+            max_detections_slider = gr.Slider(
+                minimum=1,
+                maximum=500,
+                step=1,
+                value=300,
+                label="Max Detections",
+            )
+            slice_height_input = gr.Number(value=512, label="Slice Height")
+            slice_width_input = gr.Number(value=512, label="Slice Width")
+            overlap_height_ratio_slider = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=0.2,
+                label="Overlap Height Ratio",
+            )
+            overlap_width_ratio_slider = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=0.2,
+                label="Overlap Width Ratio",
+            )
+            postprocess_type_dropdown = gr.Dropdown(
+                ["NMS", "GREEDYNMM"],
+                type="value",
+                value="NMS",
+                label="Postprocess Type",
+            )
+            postprocess_match_metric_dropdown = gr.Dropdown(
+                ["IOU", "IOS"], type="value", value="IOU", label="Postprocess Match Metric"
+            )
+            postprocess_match_threshold_slider = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=0.5,
+                label="Postprocess Match Threshold",
+            )
+            postprocess_class_agnostic_checkbox = gr.Checkbox(value=True, label="Postprocess Class Agnostic")
+            submit_button = gr.Button("Run Inference")
+        with gr.Column():
+            output_standard = gr.Image(type="pil", label="YOLOv11 Standard")
+            output_sahi_sliced = gr.Image(type="pil", label="YOLOv11 + SAHI Sliced")
+    gr.Examples(
+        examples=[
+            ["apple_tree.jpg", "yolo11s.pt", 0.5, 300, 256, 256, 0.2, 0.2, "NMS", "IOU", 0.4, True],
+            ["highway.jpg", "yolo11s.pt", 0.5, 300, 256, 256, 0.2, 0.2, "NMS", "IOU", 0.4, True],
+            ["highway2.jpg", "yolo11s.pt", 0.5, 300, 512, 512, 0.2, 0.2, "NMS", "IOU", 0.4, True],
+            ["highway3.jpg", "yolo11s.pt", 0.5, 300, 512, 512, 0.2, 0.2, "NMS", "IOU", 0.4, True],
+        ],
+        inputs=[
+            original_image_input,
+            yolo_model_dropdown,
+            confidence_threshold_slider,
+            max_detections_slider,
+            slice_height_input,
+            slice_width_input,
+            overlap_height_ratio_slider,
+            overlap_width_ratio_slider,
+            postprocess_type_dropdown,
+            postprocess_match_metric_dropdown,
+            postprocess_match_threshold_slider,
+            postprocess_class_agnostic_checkbox,
+        ],
+        outputs=[output_standard, output_sahi_sliced],
+        fn=sahi_yolo_inference,
+        cache_examples=True,
+    )
+    submit_button.click(
+        fn=sahi_yolo_inference,
+        inputs=[
+            original_image_input,
+            yolo_model_dropdown,
+            confidence_threshold_slider,
+            max_detections_slider,
+            slice_height_input,
+            slice_width_input,
+            overlap_height_ratio_slider,
+            overlap_width_ratio_slider,
+            postprocess_type_dropdown,
+            postprocess_match_metric_dropdown,
+            postprocess_match_threshold_slider,
+            postprocess_class_agnostic_checkbox,
+        ],
+        outputs=[output_standard, output_sahi_sliced],
+    )
+app.launch(mcp_server=True)