Spaces:

SatyamSinghal
/

Object_Detector

Running

App Files Files Community

SatyamSinghal commited on Nov 16, 2024

Commit

c5afb86

verified ·

1 Parent(s): 6fda31a

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -22

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import cv2
 import torch
 import numpy as np
 from PIL import Image
 # Load the YOLOv5 model
 model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
@@ -21,27 +22,68 @@ def run_inference(image):
     return Image.fromarray(annotated_image)
-# Function to generate a summary for the detected objects
-def generate_summary(image):
     results = model(image)
     detected_objects = results.pandas().xyxy[0]
     summary = "Detected objects:\n\n"
-    for idx, obj in detected_objects.iterrows():
-        summary += f"- {obj['name']} with confidence {obj['confidence']:.2f}\n"
-    return summary
-# Function to generate a scene description based on the summary
-def generate_scene_description(summary):
-    if "person" in summary.lower():
-        return "This scene might involve people interacting or a social gathering."
-    elif "car" in summary.lower() or "truck" in summary.lower():
-        return "This could be a street scene or a transportation-related scenario."
-    elif "dog" in summary.lower() or "cat" in summary.lower():
-        return "This appears to involve pets or animals, possibly in a domestic or outdoor setting."
-    else:
-        return "This scene involves various objects. It could be a dynamic or static environment."
-# Create the Gradio interface with improved UI
 with gr.Blocks(css="""
     body {
         font-family: 'Poppins', sans-serif;
@@ -76,19 +118,19 @@ with gr.Blocks(css="""
     .gr-button {
         font-size: 1em;
         padding: 12px 24px;
-        background-color: #7091E6;
         color: #FFFFFF;
         border: none;
         border-radius: 5px;
         transition: all 0.3s ease-in-out;
     }
     .gr-button:hover {
-        background-color: #8697C4;
         transform: scale(1.05);
         box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
     }
     .gr-box {
-        background: rgba(255, 255, 255, 0.1);
         border: 1px solid rgba(255, 255, 255, 0.3);
         border-radius: 10px;
         padding: 15px;
@@ -105,14 +147,14 @@ with gr.Blocks(css="""
             detect_button = gr.Button("Run Detection", elem_classes="gr-button")
         with gr.Column(scale=3):
             annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box")
-            summary_output = gr.Textbox(label="Detection Summary", lines=10, interactive=False, elem_classes="gr-box")
             scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box")
     # Actions for buttons
     def detect_and_process(image):
         annotated_image = run_inference(image)
-        summary = generate_summary(np.array(image))
-        scene_description = generate_scene_description(summary)
         return annotated_image, summary, scene_description
     detect_button.click(

 import torch
 import numpy as np
 from PIL import Image
+from collections import Counter
 # Load the YOLOv5 model
 model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
     return Image.fromarray(annotated_image)
+# Function to generate a summary for the detected objects with counts
+def generate_summary_with_counts(image):
     results = model(image)
     detected_objects = results.pandas().xyxy[0]
+    # Count detected objects
+    object_names = detected_objects['name'].tolist()
+    object_counts = Counter(object_names)
+    # Create a summary
     summary = "Detected objects:\n\n"
+    for obj, count in object_counts.items():
+        summary += f"- {obj}: {count}\n"
+    return summary, object_counts
+# Function to generate a scene description based on the detected objects
+def generate_scene_description(object_counts):
+    """
+    Generate a possible scene description based on detected objects and their counts.
+    """
+    if "person" in object_counts and "dog" in object_counts:
+        return "This scene seems to capture people spending time outdoors with pets, possibly in a park or recreational area."
+    elif "person" in object_counts and "laptop" in object_counts:
+        return "This might be a workplace or a study environment, featuring individuals using laptops for work or study."
+    elif "car" in object_counts or "truck" in object_counts:
+        return "This appears to be a street or traffic scene with vehicles in motion or parked."
+    elif "cat" in object_counts and "sofa" in object_counts:
+        return "This scene seems to capture a cozy indoor environment, likely a home with pets relaxing."
+    elif "bicycle" in object_counts and "person" in object_counts:
+        return "This could depict an outdoor activity, such as cycling or commuting by bike."
+    elif "boat" in object_counts or "ship" in object_counts:
+        return "This seems to be a water-based setting, possibly near a harbor, river, or open sea."
+    elif "bird" in object_counts and "tree" in object_counts:
+        return "This scene depicts a natural setting, possibly a park or forest, with birds and trees."
+    elif "person" in object_counts and "microwave" in object_counts:
+        return "This is likely an indoor setting, such as a kitchen, where cooking or meal preparation is taking place."
+    elif "cow" in object_counts or "sheep" in object_counts:
+        return "This scene appears to capture a rural or farming environment, featuring livestock in open fields or farms."
+    elif "horse" in object_counts and "person" in object_counts:
+        return "This might depict an equestrian scene, possibly involving horseback riding or ranch activities."
+    elif "dog" in object_counts and "ball" in object_counts:
+        return "This scene seems to show playful activities, possibly a game of fetch with a dog."
+    elif "umbrella" in object_counts and "person" in object_counts:
+        return "This might capture a rainy day or a sunny outdoor activity where umbrellas are being used."
+    elif "train" in object_counts or "railway" in object_counts:
+        return "This scene could involve a railway station or a train passing through a scenic route."
+    elif "surfboard" in object_counts or "person" in object_counts:
+        return "This is likely a beach or coastal scene featuring activities like surfing or water sports."
+    elif "book" in object_counts and "person" in object_counts:
+        return "This scene could depict a quiet reading environment, such as a library or a study room."
+    elif "traffic light" in object_counts and "car" in object_counts:
+        return "This seems to capture an urban street scene with traffic and signals controlling the flow."
+    elif "chair" in object_counts and "dining table" in object_counts:
+        return "This is likely an indoor dining area, possibly a family meal or a restaurant setting."
+    elif "flower" in object_counts and "person" in object_counts:
+        return "This scene could depict a garden or a floral setting, possibly involving gardening or photography."
+    elif "airplane" in object_counts:
+        return "This appears to capture an airport or an aerial view, featuring an airplane in flight or on the ground."
+    else:
+        return "This scene involves various objects, indicating a dynamic or diverse setting."
+# Create the Gradio interface with enhanced UI
 with gr.Blocks(css="""
     body {
         font-family: 'Poppins', sans-serif;
     .gr-button {
         font-size: 1em;
         padding: 12px 24px;
+        background: linear-gradient(90deg, #7091E6, #8697C4);
         color: #FFFFFF;
         border: none;
         border-radius: 5px;
         transition: all 0.3s ease-in-out;
     }
     .gr-button:hover {
+        background: linear-gradient(90deg, #8697C4, #7091E6);
         transform: scale(1.05);
         box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
     }
     .gr-box {
+        background: rgba(255, 255, 255, 0.2);
         border: 1px solid rgba(255, 255, 255, 0.3);
         border-radius: 10px;
         padding: 15px;
             detect_button = gr.Button("Run Detection", elem_classes="gr-button")
         with gr.Column(scale=3):
             annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box")
+            summary_output = gr.Textbox(label="Detection Summary with Object Counts", lines=10, interactive=False, elem_classes="gr-box")
             scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box")
     # Actions for buttons
     def detect_and_process(image):
         annotated_image = run_inference(image)
+        summary, object_counts = generate_summary_with_counts(np.array(image))
+        scene_description = generate_scene_description(object_counts)
         return annotated_image, summary, scene_description
     detect_button.click(