Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import cv2
|
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
from PIL import Image
|
|
|
6 |
|
7 |
# Load the YOLOv5 model
|
8 |
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
|
@@ -21,27 +22,68 @@ def run_inference(image):
|
|
21 |
|
22 |
return Image.fromarray(annotated_image)
|
23 |
|
24 |
-
# Function to generate a summary for the detected objects
|
25 |
-
def
|
26 |
results = model(image)
|
27 |
detected_objects = results.pandas().xyxy[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
summary = "Detected objects:\n\n"
|
29 |
-
for
|
30 |
-
summary += f"- {obj
|
31 |
-
return summary
|
32 |
|
33 |
-
|
34 |
-
def generate_scene_description(summary):
|
35 |
-
if "person" in summary.lower():
|
36 |
-
return "This scene might involve people interacting or a social gathering."
|
37 |
-
elif "car" in summary.lower() or "truck" in summary.lower():
|
38 |
-
return "This could be a street scene or a transportation-related scenario."
|
39 |
-
elif "dog" in summary.lower() or "cat" in summary.lower():
|
40 |
-
return "This appears to involve pets or animals, possibly in a domestic or outdoor setting."
|
41 |
-
else:
|
42 |
-
return "This scene involves various objects. It could be a dynamic or static environment."
|
43 |
|
44 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
with gr.Blocks(css="""
|
46 |
body {
|
47 |
font-family: 'Poppins', sans-serif;
|
@@ -76,19 +118,19 @@ with gr.Blocks(css="""
|
|
76 |
.gr-button {
|
77 |
font-size: 1em;
|
78 |
padding: 12px 24px;
|
79 |
-
background
|
80 |
color: #FFFFFF;
|
81 |
border: none;
|
82 |
border-radius: 5px;
|
83 |
transition: all 0.3s ease-in-out;
|
84 |
}
|
85 |
.gr-button:hover {
|
86 |
-
background
|
87 |
transform: scale(1.05);
|
88 |
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
|
89 |
}
|
90 |
.gr-box {
|
91 |
-
background: rgba(255, 255, 255, 0.
|
92 |
border: 1px solid rgba(255, 255, 255, 0.3);
|
93 |
border-radius: 10px;
|
94 |
padding: 15px;
|
@@ -105,14 +147,14 @@ with gr.Blocks(css="""
|
|
105 |
detect_button = gr.Button("Run Detection", elem_classes="gr-button")
|
106 |
with gr.Column(scale=3):
|
107 |
annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box")
|
108 |
-
summary_output = gr.Textbox(label="Detection Summary", lines=10, interactive=False, elem_classes="gr-box")
|
109 |
scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box")
|
110 |
|
111 |
# Actions for buttons
|
112 |
def detect_and_process(image):
|
113 |
annotated_image = run_inference(image)
|
114 |
-
summary =
|
115 |
-
scene_description = generate_scene_description(
|
116 |
return annotated_image, summary, scene_description
|
117 |
|
118 |
detect_button.click(
|
|
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
from PIL import Image
|
6 |
+
from collections import Counter
|
7 |
|
8 |
# Load the YOLOv5 model
|
9 |
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
|
|
|
22 |
|
23 |
return Image.fromarray(annotated_image)
|
24 |
|
25 |
+
# Function to generate a summary for the detected objects with counts
|
26 |
+
def generate_summary_with_counts(image):
|
27 |
results = model(image)
|
28 |
detected_objects = results.pandas().xyxy[0]
|
29 |
+
|
30 |
+
# Count detected objects
|
31 |
+
object_names = detected_objects['name'].tolist()
|
32 |
+
object_counts = Counter(object_names)
|
33 |
+
|
34 |
+
# Create a summary
|
35 |
summary = "Detected objects:\n\n"
|
36 |
+
for obj, count in object_counts.items():
|
37 |
+
summary += f"- {obj}: {count}\n"
|
|
|
38 |
|
39 |
+
return summary, object_counts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
# Function to generate a scene description based on the detected objects
|
42 |
+
def generate_scene_description(object_counts):
|
43 |
+
"""
|
44 |
+
Generate a possible scene description based on detected objects and their counts.
|
45 |
+
"""
|
46 |
+
if "person" in object_counts and "dog" in object_counts:
|
47 |
+
return "This scene seems to capture people spending time outdoors with pets, possibly in a park or recreational area."
|
48 |
+
elif "person" in object_counts and "laptop" in object_counts:
|
49 |
+
return "This might be a workplace or a study environment, featuring individuals using laptops for work or study."
|
50 |
+
elif "car" in object_counts or "truck" in object_counts:
|
51 |
+
return "This appears to be a street or traffic scene with vehicles in motion or parked."
|
52 |
+
elif "cat" in object_counts and "sofa" in object_counts:
|
53 |
+
return "This scene seems to capture a cozy indoor environment, likely a home with pets relaxing."
|
54 |
+
elif "bicycle" in object_counts and "person" in object_counts:
|
55 |
+
return "This could depict an outdoor activity, such as cycling or commuting by bike."
|
56 |
+
elif "boat" in object_counts or "ship" in object_counts:
|
57 |
+
return "This seems to be a water-based setting, possibly near a harbor, river, or open sea."
|
58 |
+
elif "bird" in object_counts and "tree" in object_counts:
|
59 |
+
return "This scene depicts a natural setting, possibly a park or forest, with birds and trees."
|
60 |
+
elif "person" in object_counts and "microwave" in object_counts:
|
61 |
+
return "This is likely an indoor setting, such as a kitchen, where cooking or meal preparation is taking place."
|
62 |
+
elif "cow" in object_counts or "sheep" in object_counts:
|
63 |
+
return "This scene appears to capture a rural or farming environment, featuring livestock in open fields or farms."
|
64 |
+
elif "horse" in object_counts and "person" in object_counts:
|
65 |
+
return "This might depict an equestrian scene, possibly involving horseback riding or ranch activities."
|
66 |
+
elif "dog" in object_counts and "ball" in object_counts:
|
67 |
+
return "This scene seems to show playful activities, possibly a game of fetch with a dog."
|
68 |
+
elif "umbrella" in object_counts and "person" in object_counts:
|
69 |
+
return "This might capture a rainy day or a sunny outdoor activity where umbrellas are being used."
|
70 |
+
elif "train" in object_counts or "railway" in object_counts:
|
71 |
+
return "This scene could involve a railway station or a train passing through a scenic route."
|
72 |
+
elif "surfboard" in object_counts or "person" in object_counts:
|
73 |
+
return "This is likely a beach or coastal scene featuring activities like surfing or water sports."
|
74 |
+
elif "book" in object_counts and "person" in object_counts:
|
75 |
+
return "This scene could depict a quiet reading environment, such as a library or a study room."
|
76 |
+
elif "traffic light" in object_counts and "car" in object_counts:
|
77 |
+
return "This seems to capture an urban street scene with traffic and signals controlling the flow."
|
78 |
+
elif "chair" in object_counts and "dining table" in object_counts:
|
79 |
+
return "This is likely an indoor dining area, possibly a family meal or a restaurant setting."
|
80 |
+
elif "flower" in object_counts and "person" in object_counts:
|
81 |
+
return "This scene could depict a garden or a floral setting, possibly involving gardening or photography."
|
82 |
+
elif "airplane" in object_counts:
|
83 |
+
return "This appears to capture an airport or an aerial view, featuring an airplane in flight or on the ground."
|
84 |
+
else:
|
85 |
+
return "This scene involves various objects, indicating a dynamic or diverse setting."
|
86 |
+
# Create the Gradio interface with enhanced UI
|
87 |
with gr.Blocks(css="""
|
88 |
body {
|
89 |
font-family: 'Poppins', sans-serif;
|
|
|
118 |
.gr-button {
|
119 |
font-size: 1em;
|
120 |
padding: 12px 24px;
|
121 |
+
background: linear-gradient(90deg, #7091E6, #8697C4);
|
122 |
color: #FFFFFF;
|
123 |
border: none;
|
124 |
border-radius: 5px;
|
125 |
transition: all 0.3s ease-in-out;
|
126 |
}
|
127 |
.gr-button:hover {
|
128 |
+
background: linear-gradient(90deg, #8697C4, #7091E6);
|
129 |
transform: scale(1.05);
|
130 |
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
|
131 |
}
|
132 |
.gr-box {
|
133 |
+
background: rgba(255, 255, 255, 0.2);
|
134 |
border: 1px solid rgba(255, 255, 255, 0.3);
|
135 |
border-radius: 10px;
|
136 |
padding: 15px;
|
|
|
147 |
detect_button = gr.Button("Run Detection", elem_classes="gr-button")
|
148 |
with gr.Column(scale=3):
|
149 |
annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box")
|
150 |
+
summary_output = gr.Textbox(label="Detection Summary with Object Counts", lines=10, interactive=False, elem_classes="gr-box")
|
151 |
scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box")
|
152 |
|
153 |
# Actions for buttons
|
154 |
def detect_and_process(image):
|
155 |
annotated_image = run_inference(image)
|
156 |
+
summary, object_counts = generate_summary_with_counts(np.array(image))
|
157 |
+
scene_description = generate_scene_description(object_counts)
|
158 |
return annotated_image, summary, scene_description
|
159 |
|
160 |
detect_button.click(
|