Spaces:

techysanoj
/

live-object-detection

Runtime error

App Files Files Community

techysanoj commited on Dec 1, 2023

Commit

65f769b

1 Parent(s): b2f7aa2

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -34

app.py CHANGED Viewed

@@ -1,34 +1,14 @@
 import gradio as gr
 import cv2
 import torch
-from torchvision import transforms
 from PIL import Image
-# Load the pre-trained object detection model (replace with your own model)
-# For example, using a torchvision model for demonstration purposes
-model = torch.hub.load('pytorch/vision:v0.10.0', 'fasterrcnn_resnet50_fpn', pretrained=True)
 model.eval()
-# Define the transformations for the input image
-transform = transforms.Compose([
-    transforms.ToTensor(),
-])
-# Function to perform object detection on an image
-def detect_objects(image):
-    # Convert image to tensor
-    input_tensor = transform(image).unsqueeze(0)
-    # Perform object detection
-    with torch.no_grad():
-        predictions = model(input_tensor)
-    # Extract bounding boxes and labels from predictions
-    boxes = predictions[0]['boxes'].numpy()
-    labels = predictions[0]['labels'].numpy()
-    return boxes, labels
 # Function for live object detection from the camera
 def live_object_detection():
     # Open a connection to the camera (replace with your own camera setup)
@@ -41,14 +21,21 @@ def live_object_detection():
         # Convert the frame to PIL Image
         frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-        # Perform object detection
-        boxes, labels = detect_objects(frame_pil)
         # Draw bounding boxes on the frame
-        for box, label in zip(boxes, labels):
-            box = [int(coord) for coord in box]
             cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
-            cv2.putText(frame, f"Label: {label}", (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
         # Display the resulting frame
         cv2.imshow('Object Detection', frame)
@@ -63,11 +50,8 @@ def live_object_detection():
 # Define the Gradio interface
 iface = gr.Interface(
-    fn=[detect_objects, live_object_detection],
-    inputs=[
-        gr.Image(type="pil", label="Upload a photo for object detection"),
-        "webcam",
-    ],
     outputs="image",
     live=True,
 )

 import gradio as gr
 import cv2
 import torch
 from PIL import Image
+from transformers import DetrImageProcessor, DetrForObjectDetection
+# Load the pre-trained DETR model
+processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
 model.eval()
 # Function for live object detection from the camera
 def live_object_detection():
     # Open a connection to the camera (replace with your own camera setup)
         # Convert the frame to PIL Image
         frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        # Process the frame with the DETR model
+        inputs = processor(images=frame_pil, return_tensors="pt")
+        outputs = model(**inputs)
+        # convert outputs (bounding boxes and class logits) to COCO API
+        # let's only keep detections with score > 0.9
+        target_sizes = torch.tensor([frame_pil.size[::-1]])
+        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
         # Draw bounding boxes on the frame
+        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+            box = [int(round(i)) for i in box.tolist()]
             cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
+            cv2.putText(frame, f"{model.config.id2label[label.item()]}: {round(score.item(), 3)}",
+                        (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
         # Display the resulting frame
         cv2.imshow('Object Detection', frame)
 # Define the Gradio interface
 iface = gr.Interface(
+    fn=live_object_detection,
+    inputs="webcam",
     outputs="image",
     live=True,
 )