Spaces:

atalaydenknalbant
/

ASL-Detector-YOLO

Running on Zero

App Files Files Community

atalaydenknalbant commited on Sep 5, 2024

Commit

cb1fbea

verified ·

1 Parent(s): a6ae674

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -92

app.py CHANGED Viewed

@@ -1,104 +1,147 @@
-# Import libraries
-import cv2
 from ultralytics import YOLO
 import gradio as gr
-import spaces
-# Define constants for ASL letters with color for bounding boxes
-ASL_COLORS = {
-    0: (191, 100, 21),   # A
-    1: (2, 62, 115),     # B
-    2: (140, 80, 58),    # C
-    3: (168, 181, 69),   # D
-    4: (2, 69, 84),      # E
-    5: (83, 115, 106),   # F
-    6: (255, 72, 88),    # G
-    7: (0, 204, 192),    # H
-    8: (116, 127, 127),  # I
-    9: (0, 153, 221),    # J
-    10: (196, 51, 2),    # K
-    11: (191, 100, 21),  # L
-    12: (2, 62, 115),    # M
-    13: (140, 80, 58),   # N
-    14: (168, 181, 69),  # O
-    15: (2, 69, 84),     # P
-    16: (83, 115, 106),  # Q
-    17: (255, 72, 88),   # R
-    18: (0, 204, 192),   # S
-    19: (116, 127, 127), # T
-    20: (0, 153, 221),   # U
-    21: (196, 51, 2),    # V
-    22: (191, 100, 21),  # W
-    23: (2, 62, 115),    # X
-    24: (140, 80, 58),   # Y
-    25: (168, 181, 69)   # Z
-}
-BOX_PADDING = 2
-# Load the model from the local directory
-DETECTION_MODEL = YOLO("yolov10s.pt")
 @spaces.GPU
-def detect(image_path):
-    """
-    Output inference image with bounding boxes and ASL letter predictions.
-    Args:
-    - image_path: Path to the image file
-    Return: image with bounding boxes and labels drawn
-    """
-    # Load the image
-    image = cv2.imread(image_path)
-    if image is None:
-        return image
-    # Predict on image
-    results = DETECTION_MODEL.predict(source=image, conf=0.2, iou=0.8)  # Predict on image
-    boxes = results[0].boxes  # Get bounding boxes
-    if len(boxes) == 0:
-        return image
-    # Draw bounding boxes and labels
-    for box in boxes:
-        detection_class_conf = round(box.conf.item(), 2)  # Confidence score
-        class_id = int(box.cls.item())  # Get class ID
-        # Get start and end points of the bounding box
-        start_box = (int(box.xyxy[0][0]), int(box.xyxy[0][1]))
-        end_box = (int(box.xyxy[0][2]), int(box.xyxy[0][3]))
-        # Draw bounding box
-        line_thickness = round(0.001 * (image.shape[0] + image.shape[1]) / 2) + 1
-        image = cv2.rectangle(img=image,
-                              pt1=start_box,
-                              pt2=end_box,
-                              color=ASL_COLORS[class_id],
-                              thickness=line_thickness)
-        # Draw label
-        asl_letter = chr(65 + class_id)  # Convert class ID to ASL letter
-        text = f"{asl_letter} {detection_class_conf:.2f}"  # Label text
-        font_thickness = max(line_thickness - 1, 1)
-        (font_scale_w, font_scale_h) = (line_thickness * 0.5, line_thickness * 0.5)
-        (text_w, text_h), _ = cv2.getTextSize(text=text, fontFace=2, fontScale=font_scale_w, thickness=font_thickness)
-        # Draw wrapping box for text
-        image = cv2.rectangle(img=image,
-                              pt1=(start_box[0], start_box[1] - text_h - BOX_PADDING * 2),
-                              pt2=(start_box[0] + text_w + BOX_PADDING * 2, start_box[1]),
-                              color=ASL_COLORS[class_id],
-                              thickness=-1)
-        # Put class name on image
-        start_text = (start_box[0] + BOX_PADDING, start_box[1] - BOX_PADDING)
-        image = cv2.putText(img=image, text=text, org=start_text, fontFace=0, color=(255, 255, 255), fontScale=font_scale_w, thickness=font_thickness)
-    return image
-# Gradio interface
-iface = gr.Interface(fn=detect,
-                     inputs=gr.Image(label="Upload ASL letter image", type="filepath"),
-                     outputs="image")
-# Launch the interface
-iface.launch()

+import spaces
+import supervision as sv
+import PIL.Image as Image
 from ultralytics import YOLO
+from huggingface_hub import hf_hub_download
 import gradio as gr
+global repo_id
+repo_id = "atalaydenknalbant/asl-yolo-models"
+# Model filenames directly provided, since they are known
+model_filenames = [
+    "yolov10s.pt",
+    "yolov10x.pt",
+    "yolov8s.pt",
+    "yolov8x.pt",
+    "yolov9e.pt",
+    "yolov9s.pt"
+]
+def download_models(repo_id, model_id):
+    # Download the selected model
+    hf_hub_download(repo_id, filename=model_id, local_dir=f"./")
+    return f"./{model_id}"
+box_annotator = sv.BoxAnnotator()
+category_dict = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I',
+                 9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q',
+                 17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y', 25: 'Z'}
 @spaces.GPU
+def yolo_inference(image, model_id, conf_threshold, iou_threshold, max_detection):
+    # Download models
+    model_path = download_models(repo_id, model_id)
+    model = YOLO(model_path)
+    results = model(source=image, imgsz=640, iou=iou_threshold, conf=conf_threshold, verbose=False, max_det=max_detection)[0]
+    detections = sv.Detections.from_ultralytics(results)
+    labels = [
+        f"{category_dict[class_id]} {confidence:.2f}"
+        for class_id, confidence in zip(detections.class_id, detections.confidence)
+    ]
+    annotated_image = box_annotator.annotate(image, detections=detections, labels=labels)
+    return annotated_image
+def app():
+    with gr.Blocks():
+        with gr.Row():
+            with gr.Column():
+                image = gr.Image(type="pil", label="Image", interactive=True)
+                model_id = gr.Dropdown(
+                    label="Model",
+                    choices=model_filenames,
+                    value=model_filenames[0] if model_filenames else "",
+                )
+                conf_threshold = gr.Slider(
+                    label="Confidence Threshold",
+                    minimum=0.1,
+                    maximum=1.0,
+                    step=0.1,
+                    value=0.25,
+                )
+                iou_threshold = gr.Slider(
+                    label="IoU Threshold",
+                    minimum=0.1,
+                    maximum=1.0,
+                    step=0.1,
+                    value=0.45,
+                )
+                max_detection = gr.Slider(
+                    label="Max Detection",
+                    minimum=1,
+                    step=1,
+                    value=1,
+                )
+                yolov_infer = gr.Button(value="Detect Objects")
+            with gr.Column():
+                output_image = gr.Image(type="pil", label="Annotated Image", interactive=False)
+        yolov_infer.click(
+            fn=yolo_inference,
+            inputs=[
+                image,
+                model_id,
+                conf_threshold,
+                iou_threshold,
+                max_detection,
+            ],
+            outputs=[output_image],
+        )
+        gr.Examples(
+            examples=[
+                [
+                    "b.jpg",
+                    "yolov10x.pt",
+                    0.25,
+                    0.45,
+                    1,
+                ],
+                [
+                    "a.jpg",
+                    "yolov10s.pt",
+                    0.25,
+                    0.45,
+                    1,
+                ],
+                [
+                    "y.jpg",
+                    "yolov10x.pt",
+                    0.25,
+                    0.45,
+                    1,
+                ],
+            ],
+            fn=yolo_inference,
+            inputs=[
+                image,
+                model_id,
+                conf_threshold,
+                iou_threshold,
+                max_detection,
+            ],
+            outputs=[output_image],
+            cache_examples="lazy",
+        )
+gradio_app = gr.Blocks()
+with gradio_app:
+    gr.HTML(
+        """
+    <h1 style='text-align: center'>
+    YOLO Powered ASL(American Sign Language) Letter Detector PSA: It can't detect J or Z
+    </h1>
+        """)
+    with gr.Row():
+        with gr.Column():
+            app()
+gradio_app.launch(debug=True)