muhammadsalmanalfaridzi's picture
Update app.py
0f3d01e verified
raw
history blame
4.43 kB
import gradio as gr
import numpy as np
from vision_agent.tools import *
from pillow_heif import register_heif_opener
from typing import Dict
# Register HEIF opener
register_heif_opener()
import vision_agent as va
def analyze_mixed_boxes(image) -> Dict:
"""
Analyzes an image containing mixed types of beverages, specifically water bottles and beverage cans.
1) Loads the image from the provided path.
2) Uses the 'countgd_object_detection' tool with the prompt 'water bottle, beverage can' to detect items.
3) Splits detections into a top shelf and bottom shelf by comparing detection center to the image's vertical midpoint.
4) Calculates how many water bottles and beverage cans are on each shelf and overall, along with average confidence scores.
5) Overlays bounding boxes on the image to visualize detections, then saves the annotated image.
6) Returns a dictionary summarizing the distribution of water bottles and beverage cans.
Parameters:
image (PIL.Image): The uploaded image.
Returns:
dict: Summary of the analysis with keys:
- total_items (int): total number of detected items
- total_water_bottles (int): total count of detected water bottles
- total_beverage_cans (int): total count of detected beverage cans
- top_shelf (dict): counts of bottles and cans on top shelf
- bottom_shelf (dict): counts of bottles and cans on bottom shelf
- confidence (dict): average confidence scores for bottles and cans
"""
# Convert the uploaded image to a numpy array
image = np.array(image)
height, width = image.shape[:2]
# Detect water bottles and beverage cans
detections = countgd_object_detection("water bottle, beverage can", image)
# Separate detections into top shelf and bottom shelf
mid_height = height / 2
top_shelf_dets = []
bottom_shelf_dets = []
for det in detections:
cy = ((det["bbox"][1] + det["bbox"][3]) / 2) * height
if cy < mid_height:
top_shelf_dets.append(det)
else:
bottom_shelf_dets.append(det)
# Count items by label and calculate average confidence
water_bottles = [det for det in detections if det["label"] == "water bottle"]
beverage_cans = [det for det in detections if det["label"] == "beverage can"]
avg_bottle_conf = (sum(det["score"] for det in water_bottles) / len(water_bottles)
if water_bottles else 0)
avg_can_conf = (sum(det["score"] for det in beverage_cans) / len(beverage_cans)
if beverage_cans else 0)
top_water_bottles = [det for det in top_shelf_dets if det["label"] == "water bottle"]
top_beverage_cans = [det for det in top_shelf_dets if det["label"] == "beverage can"]
bottom_water_bottles = [det for det in bottom_shelf_dets if det["label"] == "water bottle"]
bottom_beverage_cans = [det for det in bottom_shelf_dets if det["label"] == "beverage can"]
# Overlay bounding boxes and save the annotated image
annotated_image = overlay_bounding_boxes(image, detections)
# Convert annotated image back to PIL format for Gradio output
annotated_image_pil = Image.fromarray(annotated_image)
# Return the result
result = {
"total_items": len(detections),
"total_water_bottles": len(water_bottles),
"total_beverage_cans": len(beverage_cans),
"top_shelf": {
"water_bottles": len(top_water_bottles),
"beverage_cans": len(top_beverage_cans),
},
"bottom_shelf": {
"water_bottles": len(bottom_water_bottles),
"beverage_cans": len(bottom_beverage_cans),
},
"confidence": {
"water_bottles": round(avg_bottle_conf, 2),
"beverage_cans": round(avg_can_conf, 2),
},
"annotated_image": annotated_image_pil # return annotated image for display
}
return result
# Gradio Interface
iface = gr.Interface(
fn=analyze_mixed_boxes,
inputs=gr.Image(type="pil"), # allows image upload
outputs=[gr.JSON(), gr.Image(type="pil")], # display result and annotated image
title="Beverage Detection Analysis",
description="Upload an image containing water bottles and beverage cans, and the tool will analyze the distribution on shelves and display an annotated image.",
)
iface.launch()