Spaces:
Running
Running
import math | |
from typing import Any | |
import cv2 | |
import numpy as np | |
from pydantic import BaseModel, Field, field_validator | |
class Point(BaseModel): | |
x: int | |
y: int | |
def __iter__(self): | |
return iter((self.x, self.y)) | |
def __getitem__(self, index) -> int: | |
return (self.x, self.y)[index] | |
def __tuple__(self) -> tuple[int, int]: | |
return (self.x, self.y) | |
def __repr__(self) -> str: | |
return f"Point(x={self.x}, y={self.y})" | |
class BoundingBox(BaseModel): | |
label: str = Field(..., description="The label that's given for this bounding box") | |
left: int = Field(..., description="Left coordinate of the bounding box") | |
right: int = Field(..., description="Right coordinate of the bounding box") | |
top: int = Field(..., description="Top coordinate of the bounding box") | |
bottom: int = Field(..., description="Bottom coordinate of the bounding box") | |
def round_down(cls, v): | |
return math.floor(float(v)) | |
def round_up(cls, v): | |
return math.ceil(float(v)) | |
class POI(BaseModel): | |
info: dict[str, Any] | |
element_centroid: Point | |
bounding_box: BoundingBox | |
def calculate_dash_points(start, end, dash_length, gap_length): | |
x1, y1 = start | |
x2, y2 = end | |
dx = x2 - x1 | |
dy = y2 - y1 | |
dist = np.sqrt(dx * dx + dy * dy) | |
if dist == 0: | |
return [] | |
unit_x = dx / dist | |
unit_y = dy / dist | |
dash_points = [] | |
current_dist = 0 | |
while current_dist < dist: | |
dash_end = min(current_dist + dash_length, dist) | |
dash_points.extend( | |
[ | |
(int(x1 + unit_x * current_dist), int(y1 + unit_y * current_dist)), | |
(int(x1 + unit_x * dash_end), int(y1 + unit_y * dash_end)), | |
], | |
) | |
current_dist += dash_length + gap_length | |
return dash_points | |
def draw_dashed_rectangle( | |
img, | |
bbox: BoundingBox, | |
color, | |
thickness=1, | |
dash_length=10, | |
gap_length=5, | |
): | |
# Calculate dash points for all sides | |
top_points = calculate_dash_points( | |
(bbox.left + 25, bbox.top + 25), | |
(bbox.right + 25, bbox.top + 25), | |
dash_length, | |
gap_length, | |
) | |
right_points = calculate_dash_points( | |
(bbox.right + 25, bbox.top + 25), | |
(bbox.right + 25, bbox.bottom + 25), | |
dash_length, | |
gap_length, | |
) | |
bottom_points = calculate_dash_points( | |
(bbox.right + 25, bbox.bottom + 25), | |
(bbox.left + 25, bbox.bottom + 25), | |
dash_length, | |
gap_length, | |
) | |
left_points = calculate_dash_points( | |
(bbox.left + 25, bbox.bottom + 25), | |
(bbox.left + 25, bbox.top + 25), | |
dash_length, | |
gap_length, | |
) | |
# Combine all points | |
all_points = top_points + right_points + bottom_points + left_points | |
# Draw all lines at once | |
if all_points: | |
all_points = np.array(all_points).reshape((-1, 2, 2)) | |
cv2.polylines(img, all_points, False, color, thickness) | |
# @time_it(name='Annotate bounding box') | |
def annotate_bounding_box(image: bytes, bbox: BoundingBox) -> None: | |
# Draw dashed bounding box | |
draw_dashed_rectangle( | |
image, | |
bbox, | |
color=(0, 0, 255), | |
thickness=1, | |
dash_length=10, | |
gap_length=5, | |
) | |
# Prepare label | |
font_scale = 0.4 * 4 # Increased by 4x for the larger patch | |
font = cv2.FONT_HERSHEY_SIMPLEX | |
thickness = 3 # Increased thickness for the larger patch | |
# Get text size for the larger patch | |
(label_width, label_height), _ = cv2.getTextSize( | |
bbox.label, | |
font, | |
font_scale, | |
thickness, | |
) | |
# Create a larger patch (4x) | |
large_label_patch = np.zeros( | |
(label_height + 20, label_width + 20, 4), | |
dtype=np.uint8, | |
) | |
large_label_patch[:, :, 0:3] = (0, 0, 255) # BGR color format: Red background | |
large_label_patch[:, :, 3] = 128 # Alpha channel: 50% opacity (128/255 = 0.5) | |
# Draw text on the larger patch | |
cv2.putText( | |
large_label_patch, | |
bbox.label, | |
(8, label_height + 8), # Adjusted position for the larger patch | |
font, | |
font_scale, | |
(255, 255, 255, 128), # White text, 50% opaque (128/255 = 0.5) | |
thickness, | |
) | |
# Scale down the patch to improve anti-aliasing | |
label_patch = cv2.resize( | |
large_label_patch, | |
(label_width // 4 + 5, label_height // 4 + 5), | |
interpolation=cv2.INTER_AREA, | |
) | |
# Calculate position for top-left alignment | |
offset = 2 # Small offset to prevent touching the bounding box edge | |
x = min(image.shape[1], max(0, int(bbox.left + 25) - offset)) | |
y = min(image.shape[0], max(0, int(bbox.top + 25) - label_patch.shape[0] - offset)) | |
# Ensure we're not out of bounds | |
x_end = min(image.shape[1], x + label_patch.shape[1]) | |
y_end = min(image.shape[0], y + label_patch.shape[0]) | |
label_patch = label_patch[: (y_end - y), : (x_end - x)] | |
# Create a mask for the label patch | |
alpha_mask = label_patch[:, :, 3] / 255.0 | |
alpha_mask = np.repeat(alpha_mask[:, :, np.newaxis], 3, axis=2) | |
# Blend the label patch with the image | |
image_section = image[y:y_end, x:x_end] | |
blended = (1 - alpha_mask) * image_section + alpha_mask * label_patch[:, :, 0:3] | |
image[y:y_end, x:x_end] = blended.astype(np.uint8) | |
def annotate_bounding_boxes(image: bytes, bounding_boxes: list[BoundingBox]) -> bytes: | |
# Read the image | |
nparr = np.frombuffer(image, np.uint8) | |
# Decode the image | |
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) | |
padded_img = cv2.copyMakeBorder( | |
img, | |
top=25, # Value chosen based on label size | |
bottom=25, # Value chosen based on label size | |
left=25, # Value chosen based on label size | |
right=25, # Value chosen based on label size | |
borderType=cv2.BORDER_CONSTANT, | |
value=(255, 255, 255), | |
) | |
for bounding_box in bounding_boxes: | |
# Annotate the image in place with the bounding box and the bounding box label | |
annotate_bounding_box(padded_img, bounding_box) | |
_, buffer = cv2.imencode(".jpeg", padded_img) | |
return buffer.tobytes() | |