Spaces:

ttomy
/

proxy-lite-demo-v2

Running

proxy-lite-demo-v2 / src /proxy_lite /browser /bounding_boxes.py

Trisha Tomy

init

6a0e448 about 1 month ago

6.3 kB

	import math
	from typing import Any

	import cv2
	import numpy as np
	from pydantic import BaseModel, Field, field_validator


	class Point(BaseModel):
	x: int
	y: int

	def __iter__(self):
	return iter((self.x, self.y))

	def __getitem__(self, index) -> int:
	return (self.x, self.y)[index]

	def __tuple__(self) -> tuple[int, int]:
	return (self.x, self.y)

	def __repr__(self) -> str:
	return f"Point(x={self.x}, y={self.y})"


	class BoundingBox(BaseModel):
	label: str = Field(..., description="The label that's given for this bounding box")
	left: int = Field(..., description="Left coordinate of the bounding box")
	right: int = Field(..., description="Right coordinate of the bounding box")
	top: int = Field(..., description="Top coordinate of the bounding box")
	bottom: int = Field(..., description="Bottom coordinate of the bounding box")

	@field_validator("left", "top", mode="before")
	@classmethod
	def round_down(cls, v):
	return math.floor(float(v))

	@field_validator("right", "bottom", mode="before")
	@classmethod
	def round_up(cls, v):
	return math.ceil(float(v))


	class POI(BaseModel):
	info: dict[str, Any]
	element_centroid: Point
	bounding_box: BoundingBox


	def calculate_dash_points(start, end, dash_length, gap_length):
	x1, y1 = start
	x2, y2 = end
	dx = x2 - x1
	dy = y2 - y1
	dist = np.sqrt(dx * dx + dy * dy)

	if dist == 0:
	return []

	unit_x = dx / dist
	unit_y = dy / dist

	dash_points = []
	current_dist = 0
	while current_dist < dist:
	dash_end = min(current_dist + dash_length, dist)
	dash_points.extend(
	[
	(int(x1 + unit_x * current_dist), int(y1 + unit_y * current_dist)),
	(int(x1 + unit_x * dash_end), int(y1 + unit_y * dash_end)),
	],
	)
	current_dist += dash_length + gap_length

	return dash_points


	def draw_dashed_rectangle(
	img,
	bbox: BoundingBox,
	color,
	thickness=1,
	dash_length=10,
	gap_length=5,
	):
	# Calculate dash points for all sides
	top_points = calculate_dash_points(
	(bbox.left + 25, bbox.top + 25),
	(bbox.right + 25, bbox.top + 25),
	dash_length,
	gap_length,
	)
	right_points = calculate_dash_points(
	(bbox.right + 25, bbox.top + 25),
	(bbox.right + 25, bbox.bottom + 25),
	dash_length,
	gap_length,
	)
	bottom_points = calculate_dash_points(
	(bbox.right + 25, bbox.bottom + 25),
	(bbox.left + 25, bbox.bottom + 25),
	dash_length,
	gap_length,
	)
	left_points = calculate_dash_points(
	(bbox.left + 25, bbox.bottom + 25),
	(bbox.left + 25, bbox.top + 25),
	dash_length,
	gap_length,
	)

	# Combine all points
	all_points = top_points + right_points + bottom_points + left_points

	# Draw all lines at once
	if all_points:
	all_points = np.array(all_points).reshape((-1, 2, 2))
	cv2.polylines(img, all_points, False, color, thickness)


	# @time_it(name='Annotate bounding box')
	def annotate_bounding_box(image: bytes, bbox: BoundingBox) -> None:
	# Draw dashed bounding box
	draw_dashed_rectangle(
	image,
	bbox,
	color=(0, 0, 255),
	thickness=1,
	dash_length=10,
	gap_length=5,
	)

	# Prepare label
	font_scale = 0.4 * 4 # Increased by 4x for the larger patch
	font = cv2.FONT_HERSHEY_SIMPLEX
	thickness = 3 # Increased thickness for the larger patch

	# Get text size for the larger patch
	(label_width, label_height), _ = cv2.getTextSize(
	bbox.label,
	font,
	font_scale,
	thickness,
	)

	# Create a larger patch (4x)
	large_label_patch = np.zeros(
	(label_height + 20, label_width + 20, 4),
	dtype=np.uint8,
	)
	large_label_patch[:, :, 0:3] = (0, 0, 255) # BGR color format: Red background
	large_label_patch[:, :, 3] = 128 # Alpha channel: 50% opacity (128/255 = 0.5)

	# Draw text on the larger patch
	cv2.putText(
	large_label_patch,
	bbox.label,
	(8, label_height + 8), # Adjusted position for the larger patch
	font,
	font_scale,
	(255, 255, 255, 128), # White text, 50% opaque (128/255 = 0.5)
	thickness,
	)

	# Scale down the patch to improve anti-aliasing
	label_patch = cv2.resize(
	large_label_patch,
	(label_width // 4 + 5, label_height // 4 + 5),
	interpolation=cv2.INTER_AREA,
	)

	# Calculate position for top-left alignment
	offset = 2 # Small offset to prevent touching the bounding box edge
	x = min(image.shape[1], max(0, int(bbox.left + 25) - offset))
	y = min(image.shape[0], max(0, int(bbox.top + 25) - label_patch.shape[0] - offset))

	# Ensure we're not out of bounds
	x_end = min(image.shape[1], x + label_patch.shape[1])
	y_end = min(image.shape[0], y + label_patch.shape[0])
	label_patch = label_patch[: (y_end - y), : (x_end - x)]

	# Create a mask for the label patch
	alpha_mask = label_patch[:, :, 3] / 255.0
	alpha_mask = np.repeat(alpha_mask[:, :, np.newaxis], 3, axis=2)

	# Blend the label patch with the image
	image_section = image[y:y_end, x:x_end]
	blended = (1 - alpha_mask) * image_section + alpha_mask * label_patch[:, :, 0:3]
	image[y:y_end, x:x_end] = blended.astype(np.uint8)


	def annotate_bounding_boxes(image: bytes, bounding_boxes: list[BoundingBox]) -> bytes:
	# Read the image
	nparr = np.frombuffer(image, np.uint8)
	# Decode the image
	img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
	padded_img = cv2.copyMakeBorder(
	img,
	top=25, # Value chosen based on label size
	bottom=25, # Value chosen based on label size
	left=25, # Value chosen based on label size
	right=25, # Value chosen based on label size
	borderType=cv2.BORDER_CONSTANT,
	value=(255, 255, 255),
	)
	for bounding_box in bounding_boxes:
	# Annotate the image in place with the bounding box and the bounding box label
	annotate_bounding_box(padded_img, bounding_box)
	_, buffer = cv2.imencode(".jpeg", padded_img)
	return buffer.tobytes()