Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,7 @@ model_version = int(os.getenv("ROBOFLOW_MODEL_VERSION"))
|
|
23 |
|
24 |
# DINO-X Config
|
25 |
DINOX_API_KEY = os.getenv("DINO_X_API_KEY")
|
26 |
-
DINOX_PROMPT = "
|
27 |
|
28 |
# Inisialisasi Model
|
29 |
rf = Roboflow(api_key=rf_api_key)
|
@@ -69,7 +69,8 @@ def detect_combined(image):
|
|
69 |
competitor_boxes = []
|
70 |
for obj in dinox_pred:
|
71 |
dinox_box = obj.bbox
|
72 |
-
|
|
|
73 |
class_name = obj.category.strip().lower() # Normalisasi nama kelas
|
74 |
competitor_class_count[class_name] = competitor_class_count.get(class_name, 0) + 1
|
75 |
competitor_boxes.append({
|
@@ -153,6 +154,9 @@ def convert_video_to_mp4(input_path, output_path):
|
|
153 |
def detect_objects_in_video(video_path):
|
154 |
temp_output_path = "/tmp/output_video.mp4"
|
155 |
temp_frames_dir = tempfile.mkdtemp()
|
|
|
|
|
|
|
156 |
|
157 |
try:
|
158 |
# Convert video to MP4 if necessary
|
@@ -167,7 +171,6 @@ def detect_objects_in_video(video_path):
|
|
167 |
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
168 |
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
169 |
frame_size = (frame_width, frame_height)
|
170 |
-
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|
171 |
|
172 |
# VideoWriter for output video
|
173 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
@@ -185,11 +188,38 @@ def detect_objects_in_video(video_path):
|
|
185 |
# Process predictions for frame
|
186 |
predictions = yolo_model.predict(frame_path, confidence=60, overlap=80).json()
|
187 |
|
|
|
|
|
188 |
for prediction in predictions['predictions']:
|
189 |
-
x, y, w, h = prediction['x'], prediction['y'], prediction['width'], prediction['height']
|
190 |
class_name = prediction['class']
|
191 |
-
|
192 |
-
cv2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
# Write processed frame to output video
|
195 |
output_video.write(frame)
|
@@ -198,7 +228,7 @@ def detect_objects_in_video(video_path):
|
|
198 |
video.release()
|
199 |
output_video.release()
|
200 |
|
201 |
-
return temp_output_path
|
202 |
|
203 |
except Exception as e:
|
204 |
return None, f"An error occurred: {e}"
|
@@ -226,10 +256,11 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", ne
|
|
226 |
input_video = gr.Video(label="Input Video")
|
227 |
detect_video_button = gr.Button("Detect Video")
|
228 |
output_video = gr.Video(label="Output Video")
|
|
|
229 |
detect_video_button.click(
|
230 |
fn=detect_objects_in_video,
|
231 |
inputs=input_video,
|
232 |
-
outputs=[output_video]
|
233 |
)
|
234 |
|
235 |
iface.launch()
|
|
|
23 |
|
24 |
# DINO-X Config
|
25 |
DINOX_API_KEY = os.getenv("DINO_X_API_KEY")
|
26 |
+
DINOX_PROMPT = "beverage . bottle" # Customize sesuai produk kompetitor : food . drink
|
27 |
|
28 |
# Inisialisasi Model
|
29 |
rf = Roboflow(api_key=rf_api_key)
|
|
|
69 |
competitor_boxes = []
|
70 |
for obj in dinox_pred:
|
71 |
dinox_box = obj.bbox
|
72 |
+
# Filter objek yang sudah terdeteksi oleh YOLO (Overlap detection)
|
73 |
+
if not is_overlap(dinox_box, nestle_boxes): # Ignore if overlap with YOLO detections
|
74 |
class_name = obj.category.strip().lower() # Normalisasi nama kelas
|
75 |
competitor_class_count[class_name] = competitor_class_count.get(class_name, 0) + 1
|
76 |
competitor_boxes.append({
|
|
|
154 |
def detect_objects_in_video(video_path):
|
155 |
temp_output_path = "/tmp/output_video.mp4"
|
156 |
temp_frames_dir = tempfile.mkdtemp()
|
157 |
+
all_class_count = {} # To store cumulative counts for all frames
|
158 |
+
nestle_total = 0 # Total Nestlé count
|
159 |
+
frame_count = 0
|
160 |
|
161 |
try:
|
162 |
# Convert video to MP4 if necessary
|
|
|
171 |
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
172 |
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
173 |
frame_size = (frame_width, frame_height)
|
|
|
174 |
|
175 |
# VideoWriter for output video
|
176 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
|
|
188 |
# Process predictions for frame
|
189 |
predictions = yolo_model.predict(frame_path, confidence=60, overlap=80).json()
|
190 |
|
191 |
+
# Update class count for this frame
|
192 |
+
frame_class_count = {}
|
193 |
for prediction in predictions['predictions']:
|
|
|
194 |
class_name = prediction['class']
|
195 |
+
frame_class_count[class_name] = frame_class_count.get(class_name, 0) + 1
|
196 |
+
cv2.rectangle(frame, (int(prediction['x'] - prediction['width']/2),
|
197 |
+
int(prediction['y'] - prediction['height']/2)),
|
198 |
+
(int(prediction['x'] + prediction['width']/2),
|
199 |
+
int(prediction['y'] + prediction['height']/2)),
|
200 |
+
(0, 255, 0), 2)
|
201 |
+
cv2.putText(frame, class_name, (int(prediction['x'] - prediction['width']/2),
|
202 |
+
int(prediction['y'] - prediction['height']/2 - 10)),
|
203 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
204 |
+
|
205 |
+
# Update cumulative count for all frames
|
206 |
+
for class_name, count in frame_class_count.items():
|
207 |
+
all_class_count[class_name] = all_class_count.get(class_name, 0) + count
|
208 |
+
|
209 |
+
# Update total Nestlé products count
|
210 |
+
nestle_total = sum(all_class_count.values())
|
211 |
+
|
212 |
+
# Create a vertical layout for counts (dynamically updated)
|
213 |
+
count_text = "Cumulative Object Counts\n"
|
214 |
+
for class_name, count in all_class_count.items():
|
215 |
+
count_text += f"{class_name}: {count}\n"
|
216 |
+
count_text += f"\nTotal Product Nestlé: {nestle_total}"
|
217 |
+
|
218 |
+
# Overlay the counts text onto the frame
|
219 |
+
y_offset = 20
|
220 |
+
for line in count_text.split("\n"):
|
221 |
+
cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
222 |
+
y_offset += 30 # Move down for next line
|
223 |
|
224 |
# Write processed frame to output video
|
225 |
output_video.write(frame)
|
|
|
228 |
video.release()
|
229 |
output_video.release()
|
230 |
|
231 |
+
return temp_output_path, count_text
|
232 |
|
233 |
except Exception as e:
|
234 |
return None, f"An error occurred: {e}"
|
|
|
256 |
input_video = gr.Video(label="Input Video")
|
257 |
detect_video_button = gr.Button("Detect Video")
|
258 |
output_video = gr.Video(label="Output Video")
|
259 |
+
output_video_text = gr.Textbox(label="Counting Object")
|
260 |
detect_video_button.click(
|
261 |
fn=detect_objects_in_video,
|
262 |
inputs=input_video,
|
263 |
+
outputs=[output_video, output_video_text]
|
264 |
)
|
265 |
|
266 |
iface.launch()
|