Spaces:
Running
Running
| import math | |
| from typing import Any | |
| import cv2 | |
| import numpy as np | |
| from pydantic import BaseModel, Field, field_validator | |
| class Point(BaseModel): | |
| x: int | |
| y: int | |
| def __iter__(self): | |
| return iter((self.x, self.y)) | |
| def __getitem__(self, index) -> int: | |
| return (self.x, self.y)[index] | |
| def __tuple__(self) -> tuple[int, int]: | |
| return (self.x, self.y) | |
| def __repr__(self) -> str: | |
| return f"Point(x={self.x}, y={self.y})" | |
| class BoundingBox(BaseModel): | |
| label: str = Field(..., description="The label that's given for this bounding box") | |
| left: int = Field(..., description="Left coordinate of the bounding box") | |
| right: int = Field(..., description="Right coordinate of the bounding box") | |
| top: int = Field(..., description="Top coordinate of the bounding box") | |
| bottom: int = Field(..., description="Bottom coordinate of the bounding box") | |
| def round_down(cls, v): | |
| return math.floor(float(v)) | |
| def round_up(cls, v): | |
| return math.ceil(float(v)) | |
| class POI(BaseModel): | |
| info: dict[str, Any] | |
| element_centroid: Point | |
| bounding_box: BoundingBox | |
| def calculate_dash_points(start, end, dash_length, gap_length): | |
| x1, y1 = start | |
| x2, y2 = end | |
| dx = x2 - x1 | |
| dy = y2 - y1 | |
| dist = np.sqrt(dx * dx + dy * dy) | |
| if dist == 0: | |
| return [] | |
| unit_x = dx / dist | |
| unit_y = dy / dist | |
| dash_points = [] | |
| current_dist = 0 | |
| while current_dist < dist: | |
| dash_end = min(current_dist + dash_length, dist) | |
| dash_points.extend( | |
| [ | |
| (int(x1 + unit_x * current_dist), int(y1 + unit_y * current_dist)), | |
| (int(x1 + unit_x * dash_end), int(y1 + unit_y * dash_end)), | |
| ], | |
| ) | |
| current_dist += dash_length + gap_length | |
| return dash_points | |
| def draw_dashed_rectangle( | |
| img, | |
| bbox: BoundingBox, | |
| color, | |
| thickness=1, | |
| dash_length=10, | |
| gap_length=5, | |
| ): | |
| # Calculate dash points for all sides | |
| top_points = calculate_dash_points( | |
| (bbox.left + 25, bbox.top + 25), | |
| (bbox.right + 25, bbox.top + 25), | |
| dash_length, | |
| gap_length, | |
| ) | |
| right_points = calculate_dash_points( | |
| (bbox.right + 25, bbox.top + 25), | |
| (bbox.right + 25, bbox.bottom + 25), | |
| dash_length, | |
| gap_length, | |
| ) | |
| bottom_points = calculate_dash_points( | |
| (bbox.right + 25, bbox.bottom + 25), | |
| (bbox.left + 25, bbox.bottom + 25), | |
| dash_length, | |
| gap_length, | |
| ) | |
| left_points = calculate_dash_points( | |
| (bbox.left + 25, bbox.bottom + 25), | |
| (bbox.left + 25, bbox.top + 25), | |
| dash_length, | |
| gap_length, | |
| ) | |
| # Combine all points | |
| all_points = top_points + right_points + bottom_points + left_points | |
| # Draw all lines at once | |
| if all_points: | |
| all_points = np.array(all_points).reshape((-1, 2, 2)) | |
| cv2.polylines(img, all_points, False, color, thickness) | |
| # @time_it(name='Annotate bounding box') | |
| def annotate_bounding_box(image: bytes, bbox: BoundingBox) -> None: | |
| # Draw dashed bounding box | |
| draw_dashed_rectangle( | |
| image, | |
| bbox, | |
| color=(0, 0, 255), | |
| thickness=1, | |
| dash_length=10, | |
| gap_length=5, | |
| ) | |
| # Prepare label | |
| font_scale = 0.4 * 4 # Increased by 4x for the larger patch | |
| font = cv2.FONT_HERSHEY_SIMPLEX | |
| thickness = 3 # Increased thickness for the larger patch | |
| # Get text size for the larger patch | |
| (label_width, label_height), _ = cv2.getTextSize( | |
| bbox.label, | |
| font, | |
| font_scale, | |
| thickness, | |
| ) | |
| # Create a larger patch (4x) | |
| large_label_patch = np.zeros( | |
| (label_height + 20, label_width + 20, 4), | |
| dtype=np.uint8, | |
| ) | |
| large_label_patch[:, :, 0:3] = (0, 0, 255) # BGR color format: Red background | |
| large_label_patch[:, :, 3] = 128 # Alpha channel: 50% opacity (128/255 = 0.5) | |
| # Draw text on the larger patch | |
| cv2.putText( | |
| large_label_patch, | |
| bbox.label, | |
| (8, label_height + 8), # Adjusted position for the larger patch | |
| font, | |
| font_scale, | |
| (255, 255, 255, 128), # White text, 50% opaque (128/255 = 0.5) | |
| thickness, | |
| ) | |
| # Scale down the patch to improve anti-aliasing | |
| label_patch = cv2.resize( | |
| large_label_patch, | |
| (label_width // 4 + 5, label_height // 4 + 5), | |
| interpolation=cv2.INTER_AREA, | |
| ) | |
| # Calculate position for top-left alignment | |
| offset = 2 # Small offset to prevent touching the bounding box edge | |
| x = min(image.shape[1], max(0, int(bbox.left + 25) - offset)) | |
| y = min(image.shape[0], max(0, int(bbox.top + 25) - label_patch.shape[0] - offset)) | |
| # Ensure we're not out of bounds | |
| x_end = min(image.shape[1], x + label_patch.shape[1]) | |
| y_end = min(image.shape[0], y + label_patch.shape[0]) | |
| label_patch = label_patch[: (y_end - y), : (x_end - x)] | |
| # Create a mask for the label patch | |
| alpha_mask = label_patch[:, :, 3] / 255.0 | |
| alpha_mask = np.repeat(alpha_mask[:, :, np.newaxis], 3, axis=2) | |
| # Blend the label patch with the image | |
| image_section = image[y:y_end, x:x_end] | |
| blended = (1 - alpha_mask) * image_section + alpha_mask * label_patch[:, :, 0:3] | |
| image[y:y_end, x:x_end] = blended.astype(np.uint8) | |
| def annotate_bounding_boxes(image: bytes, bounding_boxes: list[BoundingBox]) -> bytes: | |
| # Read the image | |
| nparr = np.frombuffer(image, np.uint8) | |
| # Decode the image | |
| img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) | |
| padded_img = cv2.copyMakeBorder( | |
| img, | |
| top=25, # Value chosen based on label size | |
| bottom=25, # Value chosen based on label size | |
| left=25, # Value chosen based on label size | |
| right=25, # Value chosen based on label size | |
| borderType=cv2.BORDER_CONSTANT, | |
| value=(255, 255, 255), | |
| ) | |
| for bounding_box in bounding_boxes: | |
| # Annotate the image in place with the bounding box and the bounding box label | |
| annotate_bounding_box(padded_img, bounding_box) | |
| _, buffer = cv2.imencode(".jpeg", padded_img) | |
| return buffer.tobytes() | |