Spaces:
Sleeping
Sleeping
| import base64 | |
| import os | |
| from io import BytesIO | |
| import cv2 | |
| import gradio as gr | |
| import numpy as np | |
| import pyrebase | |
| import requests | |
| from openai import OpenAI | |
| from PIL import Image, ImageDraw, ImageFont | |
| from ultralytics import YOLO | |
| from prompts import remove_unwanted_prompt | |
| model = YOLO("yolo11n.pt") | |
| def get_middle_thumbnail(input_image: Image, grid_size=(10, 10), padding=3): | |
| """ | |
| Extract the middle thumbnail from a sprite sheet, handling different aspect ratios | |
| and removing padding. | |
| Args: | |
| input_image: PIL Image | |
| grid_size: Tuple of (columns, rows) | |
| padding: Number of padding pixels on each side (default 3) | |
| Returns: | |
| PIL.Image: The middle thumbnail image with padding removed | |
| """ | |
| sprite_sheet = input_image | |
| # Calculate thumbnail dimensions based on actual sprite sheet size | |
| sprite_width, sprite_height = sprite_sheet.size | |
| thumb_width_with_padding = sprite_width // grid_size[0] | |
| thumb_height_with_padding = sprite_height // grid_size[1] | |
| # Remove padding to get actual image dimensions | |
| thumb_width = thumb_width_with_padding - (2 * padding) # 726 - 6 = 720 | |
| thumb_height = thumb_height_with_padding - (2 * padding) # varies based on input | |
| # Calculate the middle position | |
| total_thumbs = grid_size[0] * grid_size[1] | |
| middle_index = total_thumbs // 2 | |
| # Calculate row and column of middle thumbnail | |
| middle_row = middle_index // grid_size[0] | |
| middle_col = middle_index % grid_size[0] | |
| # Calculate pixel coordinates for cropping, including padding offset | |
| left = (middle_col * thumb_width_with_padding) + padding | |
| top = (middle_row * thumb_height_with_padding) + padding | |
| right = left + thumb_width # Don't add padding here | |
| bottom = top + thumb_height # Don't add padding here | |
| # Crop and return the middle thumbnail | |
| middle_thumb = sprite_sheet.crop((left, top, right, bottom)) | |
| return middle_thumb | |
| def encode_image_to_base64(image: Image.Image, format: str = "JPEG") -> str: | |
| """ | |
| Convert a PIL image to a base64 string. | |
| Args: | |
| image: PIL Image object | |
| format: Image format to use for encoding (default: PNG) | |
| Returns: | |
| Base64 encoded string of the image | |
| """ | |
| buffered = BytesIO() | |
| image.save(buffered, format=format) | |
| return base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| def add_top_numbers( | |
| input_image, | |
| num_divisions=20, | |
| margin=90, | |
| font_size=70, | |
| dot_spacing=20, | |
| ): | |
| """ | |
| Add numbered divisions across the top and bottom of any image with dotted vertical lines. | |
| Args: | |
| input_image (Image): PIL Image | |
| num_divisions (int): Number of divisions to create | |
| margin (int): Size of margin in pixels for numbers | |
| font_size (int): Font size for numbers | |
| dot_spacing (int): Spacing between dots in pixels | |
| """ | |
| # Load the image | |
| original_image = input_image | |
| # Create new image with extra space for numbers on top and bottom | |
| new_width = original_image.width | |
| new_height = original_image.height + ( | |
| 2 * margin | |
| ) # Add margin to both top and bottom | |
| new_image = Image.new("RGB", (new_width, new_height), "white") | |
| # Paste original image in the middle | |
| new_image.paste(original_image, (0, margin)) | |
| # Initialize drawing context | |
| draw = ImageDraw.Draw(new_image) | |
| try: | |
| font = ImageFont.truetype("arial.ttf", font_size) | |
| except OSError: | |
| print("Using default font") | |
| font = ImageFont.load_default(size=font_size) | |
| # Calculate division width | |
| division_width = original_image.width / num_divisions | |
| # Draw division numbers and dotted lines | |
| for i in range(num_divisions): | |
| x = (i * division_width) + (division_width / 2) | |
| # Draw number at top | |
| draw.text((x, margin // 2), str(i + 1), fill="black", font=font, anchor="mm") | |
| # Draw number at bottom | |
| draw.text( | |
| (x, new_height - (margin // 2)), | |
| str(i + 1), | |
| fill="black", | |
| font=font, | |
| anchor="mm", | |
| ) | |
| # Draw dotted line from top margin to bottom margin | |
| y_start = margin | |
| y_end = new_height - margin | |
| # Draw dots with specified spacing | |
| current_y = y_start | |
| while current_y < y_end: | |
| draw.circle( | |
| [x - 1, current_y - 1, x + 1, current_y + 1], | |
| fill="black", | |
| width=5, | |
| radius=3, | |
| ) | |
| current_y += dot_spacing | |
| return new_image | |
| def analyze_image(numbered_input_image: Image, prompt, input_image, ct): | |
| """ | |
| Perform inference on an image using GPT-4V. | |
| Args: | |
| numbered_input_image (Image): PIL Image | |
| prompt (str): The prompt/question about the image | |
| input_image (Image): input image without numbers | |
| Returns: | |
| str: The model's response | |
| """ | |
| client = OpenAI() | |
| base64_image = encode_image_to_base64(numbered_input_image, format="JPEG") | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, | |
| }, | |
| ], | |
| } | |
| ] | |
| response = client.chat.completions.create( | |
| model="gpt-4o", messages=messages, max_tokens=300 | |
| ) | |
| messages.extend( | |
| [ | |
| {"role": "assistant", "content": response.choices[0].message.content}, | |
| { | |
| "role": "user", | |
| "content": "please return the response in the json with keys left_row, right_row, and num_of_speakers", | |
| }, | |
| ], | |
| ) | |
| response = ( | |
| client.chat.completions.create(model="gpt-4o", messages=messages) | |
| .choices[0] | |
| .message.content | |
| ) | |
| left_index = response.find("{") | |
| right_index = response.rfind("}") | |
| try: | |
| if left_index != -1 and right_index != -1: | |
| print(response[left_index : right_index + 1]) | |
| response_json = eval(response[left_index : right_index + 1]) | |
| except Exception as e: | |
| print(e) | |
| return 0, 20 | |
| return ( | |
| response_json["left_row"], | |
| response_json["right_row"], | |
| response_json["num_of_speakers"], | |
| ) | |
| def get_sprite_firebase(cid, rsid, uid): | |
| config = { | |
| "apiKey": f"{os.getenv('FIREBASE_API_KEY')}", | |
| "authDomain": f"{os.getenv('FIREBASE_AUTH_DOMAIN')}", | |
| "databaseURL": f"{os.getenv('FIREBASE_DATABASE_URL')}", | |
| "projectId": f"{os.getenv('FIREBASE_PROJECT_ID')}", | |
| "storageBucket": f"{os.getenv('FIREBASE_STORAGE_BUCKET')}", | |
| "messagingSenderId": f"{os.getenv('FIREBASE_MESSAGING_SENDER_ID')}", | |
| "appId": f"{os.getenv('FIREBASE_APP_ID')}", | |
| "measurementId": f"{os.getenv('FIREBASE_MEASUREMENT_ID')}", | |
| } | |
| firebase = pyrebase.initialize_app(config) | |
| db = firebase.database() | |
| account_id = os.getenv("ROLL_ACCOUNT") | |
| COLLAB_EDIT_LINK = "collab_sprite_link_handler" | |
| path = f"{account_id}/{COLLAB_EDIT_LINK}/{uid}/{cid}/{rsid}" | |
| data = db.child(path).get() | |
| return data.val() | |
| def find_persons_center(image, num_of_speakers=1): | |
| """ | |
| Find the center point of the largest num_of_speakers persons in the image. | |
| If multiple persons are detected, merge the bounding boxes of only the largest ones. | |
| Args: | |
| image: CV2/numpy array image | |
| num_of_speakers: Number of speakers to consider (default: 1) | |
| Returns: | |
| int: x-coordinate of the center point of all considered persons | |
| """ | |
| # Detect persons (class 0 in COCO dataset) | |
| results = model(image, classes=[0], conf=0.6) | |
| if not results or len(results[0].boxes) == 0: | |
| # If no persons detected, return center of image | |
| return image.shape[1] // 2 | |
| # Get all person boxes | |
| boxes = results[0].boxes.xyxy.cpu().numpy() | |
| # Print the number of persons detected (for debugging) | |
| print(f"Detected {len(boxes)} persons in the image") | |
| if len(boxes) == 1: | |
| # If only one person, return center of their bounding box | |
| x1, _, x2, _ = boxes[0] | |
| center_x = int((x1 + x2) // 2) | |
| print(f"Single person detected at center x: {center_x}") | |
| return center_x | |
| else: | |
| # Multiple persons - consider only the largest num_of_speakers boxes | |
| # Calculate area for each box | |
| box_areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in boxes] | |
| # Sort boxes by area (largest first) and take top num_of_speakers | |
| sorted_indices = sorted( | |
| range(len(box_areas)), key=lambda i: box_areas[i], reverse=True | |
| ) | |
| # Use all available boxes if fewer detected than requested | |
| num_boxes_to_use = min(num_of_speakers, len(boxes)) | |
| selected_indices = sorted_indices[:num_boxes_to_use] | |
| selected_boxes = [boxes[i] for i in selected_indices] | |
| # Create a merged bounding box from selected boxes | |
| left_x = min(box[0] for box in selected_boxes) | |
| right_x = max(box[2] for box in selected_boxes) | |
| merged_center_x = int((left_x + right_x) // 2) | |
| print( | |
| f"{num_boxes_to_use} largest persons merged bounding box center x: {merged_center_x}" | |
| ) | |
| print(f"Merged bounds: left={left_x}, right={right_x}") | |
| return merged_center_x | |
| def create_layouts(image, left_division, right_division, num_of_speakers): | |
| """ | |
| Create different layout variations of the image using specific aspect ratios. | |
| All layout variations will be centered on detected persons. | |
| Args: | |
| image: PIL Image | |
| left_division: Left division index (1-20) | |
| right_division: Right division index (1-20) | |
| Returns: | |
| tuple: (standard_crops, threehalfs_layouts, twothirdhalfs_layouts, twoequalhalfs_layouts, visualization_data) | |
| """ | |
| # Convert PIL Image to cv2 format | |
| if isinstance(image, Image.Image): | |
| image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
| else: | |
| image_cv = image.copy() | |
| # Get image dimensions | |
| height, width = image_cv.shape[:2] | |
| # Calculate division width and crop boundaries | |
| division_width = width / 20 # Assuming 20 divisions | |
| left_boundary = int((left_division - 1) * division_width) | |
| right_boundary = int(right_division * division_width) | |
| # 1. Create cutout image based on divisions | |
| cutout_image = image_cv[:, left_boundary:right_boundary].copy() | |
| cutout_width = right_boundary - left_boundary | |
| cutout_height = cutout_image.shape[0] | |
| # 2. Run YOLO on cutout to get person bounding box and center | |
| results = model(cutout_image, classes=[0], conf=0.6) | |
| # Default center if no detection | |
| cutout_center_x = cutout_image.shape[1] // 2 | |
| cutout_center_y = cutout_height // 2 | |
| # Default values for bounding box | |
| person_top = 0.0 | |
| person_height = float(cutout_height) | |
| if results and len(results[0].boxes) > 0: | |
| # Get person detection | |
| boxes = results[0].boxes.xyxy.cpu().numpy() | |
| if len(boxes) == 1: | |
| # Single person | |
| x1, y1, x2, y2 = boxes[0] | |
| cutout_center_x = int((x1 + x2) // 2) | |
| cutout_center_y = int((y1 + y2) // 2) | |
| person_top = y1 | |
| person_height = y2 - y1 | |
| else: | |
| # Multiple persons - consider only the largest num_of_speakers boxes | |
| # Calculate area for each box | |
| box_areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in boxes] | |
| # Sort boxes by area (largest first) and take top num_of_speakers | |
| sorted_indices = sorted( | |
| range(len(box_areas)), key=lambda i: box_areas[i], reverse=True | |
| ) | |
| # Use all available boxes if fewer detected than requested | |
| num_boxes_to_use = min(num_of_speakers, len(boxes)) | |
| selected_indices = sorted_indices[:num_boxes_to_use] | |
| selected_boxes = [boxes[i] for i in selected_indices] | |
| # Merge bounding boxes of selected boxes | |
| left_x = min(box[0] for box in selected_boxes) | |
| right_x = max(box[2] for box in selected_boxes) | |
| top_y = min(box[1] for box in selected_boxes) # Top of highest person | |
| bottom_y = max(box[3] for box in selected_boxes) # Bottom of lowest person | |
| cutout_center_x = int((left_x + right_x) // 2) | |
| cutout_center_y = int((top_y + bottom_y) // 2) | |
| person_top = top_y | |
| person_height = bottom_y - top_y | |
| # 3. Create 16:9 and 9:16 versions with person properly framed | |
| aspect_16_9 = 16 / 9 | |
| aspect_9_16 = 9 / 16 | |
| # For 16:9 version (with 5% margin above person) | |
| target_height_16_9 = int(cutout_width / aspect_16_9) | |
| if target_height_16_9 <= cutout_height: | |
| # Calculate 5% of person height for top margin | |
| top_margin = int(person_height * 0.05) | |
| # Start 5% above the person's top | |
| y_start = int(max(0, person_top - top_margin)) | |
| # If this would make the crop exceed the bottom, adjust y_start | |
| if y_start + target_height_16_9 > cutout_height: | |
| y_start = int(max(0, cutout_height - target_height_16_9)) | |
| y_end = int(min(cutout_height, y_start + target_height_16_9)) | |
| cutout_16_9 = cutout_image[y_start:y_end, :].copy() | |
| else: | |
| # Handle rare case where we need to adjust width (not expected with normal images) | |
| new_width = int(cutout_height * aspect_16_9) | |
| x_start = max( | |
| 0, min(cutout_width - new_width, cutout_center_x - new_width // 2) | |
| ) | |
| x_end = min(cutout_width, x_start + new_width) | |
| cutout_16_9 = cutout_image[:, x_start:x_end].copy() | |
| # For 9:16 version (centered on person, adjusted upward for face visibility) | |
| target_width_9_16 = int(cutout_height * aspect_9_16) | |
| # Adjust center point upward by 20% of person height to ensure face is visible | |
| adjusted_center_y = int(cutout_center_y - (person_height * 0.2)) | |
| if target_width_9_16 <= cutout_width: | |
| # Center horizontally around person | |
| x_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_width - target_width_9_16, | |
| cutout_center_x - target_width_9_16 // 2, | |
| ), | |
| ) | |
| ) | |
| x_end = int(min(cutout_width, x_start + target_width_9_16)) | |
| # Use adjusted center point for vertical positioning | |
| y_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_height - cutout_height, | |
| adjusted_center_y - cutout_height // 2, | |
| ), | |
| ) | |
| ) | |
| cutout_9_16 = cutout_image[y_start:, x_start:x_end].copy() | |
| else: | |
| # Handle rare case where we need to adjust height | |
| new_height = int(cutout_width / aspect_9_16) | |
| # Use adjusted center point for vertical positioning | |
| y_start = int( | |
| max(0, min(cutout_height - new_height, adjusted_center_y - new_height // 2)) | |
| ) | |
| y_end = int(min(cutout_height, y_start + new_height)) | |
| cutout_9_16 = cutout_image[y_start:y_end, :].copy() | |
| # 4. Scale the center back to original image coordinates | |
| original_center_x = left_boundary + cutout_center_x | |
| original_center_y = cutout_center_y | |
| original_person_top = person_top | |
| # Store visualization data for drawing | |
| visualization_data = { | |
| "original_center_x": original_center_x, | |
| "original_center_y": original_center_y, | |
| "original_person_top": original_person_top, | |
| "original_person_height": person_height, | |
| "cutout_bounds": (left_boundary, right_boundary), | |
| } | |
| # 5. Create new layout variations - each segment is independently centered on the subject | |
| # ----- Create crops for threehalfs layout ----- | |
| # For 16:9 (three 5.3:9 segments, each independently centered) | |
| aspect_5_3_9 = 5.3 / 9 | |
| # Calculate dimensions for each segment | |
| segment_height_16_9 = cutout_height # Use full height | |
| segment_width_16_9 = int(segment_height_16_9 * aspect_5_3_9) | |
| # Create three segments for 16:9 threehalfs - all centered on the person | |
| threehalfs_16_9_segments = [] | |
| for i in range(3): | |
| # Each segment is centered on the person | |
| segment_x_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_width - segment_width_16_9, | |
| cutout_center_x - segment_width_16_9 // 2, | |
| ), | |
| ) | |
| ) | |
| segment_x_end = int(min(cutout_width, segment_x_start + segment_width_16_9)) | |
| # Create the segment | |
| segment = cutout_image[:, segment_x_start:segment_x_end].copy() | |
| # Add a label for visualization | |
| label = f"Part {i+1}" | |
| cv2.putText( | |
| segment, | |
| label, | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| threehalfs_16_9_segments.append(segment) | |
| # For 9:16 (three 9:5.3 segments, each independently centered) | |
| aspect_9_5_3 = 9 / 5.3 | |
| # Calculate dimensions for each segment | |
| segment_width_9_16 = cutout_9_16.shape[1] # Use full width of 9:16 crop | |
| segment_height_9_16 = int(segment_width_9_16 / aspect_9_5_3) | |
| # Get adjusted center for 9:16 segments (move up by 20% of person height) | |
| cutout_9_16_center_y = cutout_9_16.shape[0] // 2 | |
| adjusted_9_16_center_y = int(cutout_9_16_center_y - (person_height * 0.2)) | |
| cutout_9_16_height = cutout_9_16.shape[0] | |
| # Create three segments for 9:16 threehalfs - all centered on the person | |
| threehalfs_9_16_segments = [] | |
| for i in range(3): | |
| # Each segment is centered on the person with adjusted center point | |
| segment_y_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_9_16_height - segment_height_9_16, | |
| person_top, | |
| ), | |
| ) | |
| ) | |
| segment_y_end = int( | |
| min(cutout_9_16_height, segment_y_start + segment_height_9_16) | |
| ) | |
| # Create the segment | |
| segment = cutout_9_16[segment_y_start:segment_y_end, :].copy() | |
| # Add a label for visualization | |
| label = f"Part {i+1}" | |
| cv2.putText( | |
| segment, | |
| label, | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| threehalfs_9_16_segments.append(segment) | |
| # ----- Create crops for twothirdhalfs layout ----- | |
| # For 16:9 (two segments: 10.6:9 and 5.3:9 OR 5.3:9 and 10.6:9) | |
| aspect_10_6_9 = 10.6 / 9 | |
| # Calculate dimensions for segments | |
| segment1_height_16_9 = cutout_height # Use full height | |
| segment1_width_16_9 = int(segment1_height_16_9 * aspect_10_6_9) | |
| segment2_height_16_9 = cutout_height # Use full height | |
| segment2_width_16_9 = int(segment2_height_16_9 * aspect_5_3_9) | |
| # Create segments for 16:9 twothirdhalfs var1 (10.6:9 then 5.3:9) | |
| # Both segments independently centered on the person | |
| # First segment (10.6:9) | |
| segment_x_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_width - segment1_width_16_9, | |
| cutout_center_x - segment1_width_16_9 // 2, | |
| ), | |
| ) | |
| ) | |
| segment_x_end = int(min(cutout_width, segment_x_start + segment1_width_16_9)) | |
| segment1 = cutout_image[:, segment_x_start:segment_x_end].copy() | |
| # Add label | |
| cv2.putText( | |
| segment1, | |
| "10.6:9", | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| # Second segment (5.3:9) | |
| segment_x_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_width - segment2_width_16_9, | |
| cutout_center_x - segment2_width_16_9 // 2, | |
| ), | |
| ) | |
| ) | |
| segment_x_end = int(min(cutout_width, segment_x_start + segment2_width_16_9)) | |
| segment2 = cutout_image[:, segment_x_start:segment_x_end].copy() | |
| # Add label | |
| cv2.putText( | |
| segment2, | |
| "5.3:9", | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| twothirdhalfs_16_9_var1_segments = [segment1, segment2] | |
| # Create segments for 16:9 twothirdhalfs var2 (5.3:9 then 10.6:9) | |
| # First segment (5.3:9) - reuse segment2 from var1 | |
| # Second segment (10.6:9) - reuse segment1 from var1 | |
| twothirdhalfs_16_9_var2_segments = [segment2.copy(), segment1.copy()] | |
| # For 9:16 (two segments stacked: 9:10.6 and 9:5.3 OR 9:5.3 and 9:10.6) | |
| aspect_9_10_6 = 9 / 10.6 | |
| aspect_9_5_3 = 9 / 5.3 | |
| # Calculate dimensions for segments | |
| segment1_width_9_16 = cutout_9_16.shape[1] # Use full width of 9:16 crop | |
| segment1_height_9_16 = int(segment1_width_9_16 / aspect_9_10_6) | |
| segment2_width_9_16 = cutout_9_16.shape[1] # Use full width of 9:16 crop | |
| segment2_height_9_16 = int(segment2_width_9_16 / aspect_9_5_3) | |
| # Create segments for 9:16 twothirdhalfs var1 (9:10.6 then 9:5.3) | |
| # Both segments independently centered on the person with adjusted center point | |
| # First segment (9:10.6) | |
| segment_y_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_9_16_height - segment1_height_9_16, | |
| adjusted_9_16_center_y - segment1_height_9_16 // 2, | |
| ), | |
| ) | |
| ) | |
| segment_y_end = int(min(cutout_9_16_height, segment_y_start + segment1_height_9_16)) | |
| segment1 = cutout_9_16[segment_y_start:segment_y_end, :].copy() | |
| # Add label | |
| cv2.putText( | |
| segment1, | |
| "9:10.6", | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| # Second segment (9:5.3) | |
| segment_y_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_9_16_height - segment2_height_9_16, | |
| person_top, | |
| ), | |
| ) | |
| ) | |
| segment_y_end = int(min(cutout_9_16_height, segment_y_start + segment2_height_9_16)) | |
| segment2 = cutout_9_16[segment_y_start:segment_y_end, :].copy() | |
| # Add label | |
| cv2.putText( | |
| segment2, | |
| "9:5.3", | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| twothirdhalfs_9_16_var1_segments = [segment1, segment2] | |
| # Create segments for 9:16 twothirdhalfs var2 (9:5.3 then 9:10.6) | |
| # First segment (9:5.3) - reuse segment2 from var1 | |
| # Second segment (9:10.6) - reuse segment1 from var1 | |
| twothirdhalfs_9_16_var2_segments = [segment2.copy(), segment1.copy()] | |
| # ----- Create crops for twoequalhalfs layout ----- | |
| # For 16:9 (two 8:9 segments side by side) | |
| aspect_8_9 = 8 / 9 | |
| # Calculate dimensions for segments | |
| segment_height_16_9_equal = cutout_height # Use full height | |
| segment_width_16_9_equal = int(segment_height_16_9_equal * aspect_8_9) | |
| # Create segments for 16:9 twoequalhalfs - both centered on the person | |
| # First segment (8:9) | |
| segment_x_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_width - segment_width_16_9_equal, | |
| cutout_center_x - segment_width_16_9_equal // 2, | |
| ), | |
| ) | |
| ) | |
| segment_x_end = int(min(cutout_width, segment_x_start + segment_width_16_9_equal)) | |
| segment1 = cutout_image[:, segment_x_start:segment_x_end].copy() | |
| # Add label | |
| cv2.putText( | |
| segment1, | |
| "8:9 (1)", | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| # Second segment (identical to first for equal halfs) | |
| segment2 = segment1.copy() | |
| # Update label for segment 2 | |
| cv2.putText( | |
| segment2, | |
| "8:9 (2)", | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| twoequalhalfs_16_9_segments = [segment1, segment2] | |
| # For 9:16 (two 9:8 segments stacked) | |
| aspect_9_8 = 9 / 8 | |
| # Calculate dimensions for segments | |
| segment_width_9_16_equal = cutout_9_16.shape[1] # Use full width of 9:16 crop | |
| segment_height_9_16_equal = int(segment_width_9_16_equal / aspect_9_8) | |
| # Create segments for 9:16 twoequalhalfs - both centered on the person with adjusted center point | |
| # First segment (9:8) | |
| segment_y_start = int( | |
| max( | |
| 0, | |
| min( | |
| cutout_9_16_height - segment_height_9_16_equal, | |
| max(0, person_top - person_height * 0.05), | |
| ), | |
| ) | |
| ) | |
| segment_y_end = int( | |
| min(cutout_9_16_height, segment_y_start + segment_height_9_16_equal) | |
| ) | |
| segment1 = cutout_9_16[segment_y_start:segment_y_end, :].copy() | |
| # Add label | |
| cv2.putText( | |
| segment1, | |
| "9:8 (1)", | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| # Second segment (identical to first for equal halfs) | |
| segment2 = segment1.copy() | |
| # Update label for segment 2 | |
| cv2.putText( | |
| segment2, | |
| "9:8 (2)", | |
| (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.8, | |
| (255, 255, 255), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| twoequalhalfs_9_16_segments = [segment1, segment2] | |
| # 6. Create composite layouts by joining segments | |
| # Function to create a composite image | |
| def create_composite(segments, horizontal=True): | |
| if not segments: | |
| return None | |
| if horizontal: | |
| # Calculate the total width and max height | |
| total_width = sum(segment.shape[1] for segment in segments) | |
| max_height = max(segment.shape[0] for segment in segments) | |
| # Create a canvas | |
| composite = np.zeros((max_height, total_width, 3), dtype=np.uint8) | |
| # Place segments side by side | |
| x_offset = 0 | |
| for segment in segments: | |
| h, w = segment.shape[:2] | |
| composite[:h, x_offset : x_offset + w] = segment | |
| x_offset += w | |
| else: # vertical stacking | |
| # Calculate the max width and total height | |
| max_width = max(segment.shape[1] for segment in segments) | |
| total_height = sum(segment.shape[0] for segment in segments) | |
| # Create a canvas | |
| composite = np.zeros((total_height, max_width, 3), dtype=np.uint8) | |
| # Place segments top to bottom | |
| y_offset = 0 | |
| for segment in segments: | |
| h, w = segment.shape[:2] | |
| composite[y_offset : y_offset + h, :w] = segment | |
| y_offset += h | |
| return composite | |
| # Create composite layouts | |
| threehalfs_16_9_composite = create_composite( | |
| threehalfs_16_9_segments, horizontal=True | |
| ) | |
| threehalfs_9_16_composite = create_composite( | |
| threehalfs_9_16_segments, horizontal=False | |
| ) | |
| twothirdhalfs_16_9_var1_composite = create_composite( | |
| twothirdhalfs_16_9_var1_segments, horizontal=True | |
| ) | |
| twothirdhalfs_16_9_var2_composite = create_composite( | |
| twothirdhalfs_16_9_var2_segments, horizontal=True | |
| ) | |
| twothirdhalfs_9_16_var1_composite = create_composite( | |
| twothirdhalfs_9_16_var1_segments, horizontal=False | |
| ) | |
| twothirdhalfs_9_16_var2_composite = create_composite( | |
| twothirdhalfs_9_16_var2_segments, horizontal=False | |
| ) | |
| twoequalhalfs_16_9_composite = create_composite( | |
| twoequalhalfs_16_9_segments, horizontal=True | |
| ) | |
| twoequalhalfs_9_16_composite = create_composite( | |
| twoequalhalfs_9_16_segments, horizontal=False | |
| ) | |
| # Add labels to all composites | |
| def add_label(img, label): | |
| if img is None: | |
| return None | |
| font = cv2.FONT_HERSHEY_SIMPLEX | |
| label_settings = { | |
| "fontScale": 1.0, | |
| "fontFace": font, | |
| "thickness": 2, | |
| } | |
| # Draw background for text | |
| text_size = cv2.getTextSize( | |
| label, | |
| fontFace=label_settings["fontFace"], | |
| fontScale=label_settings["fontScale"], | |
| thickness=label_settings["thickness"], | |
| ) | |
| cv2.rectangle( | |
| img, | |
| (10, 10), | |
| (10 + text_size[0][0] + 10, 10 + text_size[0][1] + 10), | |
| (0, 0, 0), | |
| -1, | |
| ) # Black background | |
| # Draw text | |
| cv2.putText( | |
| img, | |
| label, | |
| (15, 15 + text_size[0][1]), | |
| fontFace=label_settings["fontFace"], | |
| fontScale=label_settings["fontScale"], | |
| thickness=label_settings["thickness"], | |
| color=(255, 255, 255), | |
| lineType=cv2.LINE_AA, | |
| ) | |
| return img | |
| # Label the basic crops | |
| cutout_image_labeled = add_label(cutout_image.copy(), "Cutout") | |
| cutout_16_9_labeled = add_label(cutout_16_9.copy(), "16:9") | |
| cutout_9_16_labeled = add_label(cutout_9_16.copy(), "9:16") | |
| # Label the composite layouts | |
| threehalfs_16_9_labeled = add_label(threehalfs_16_9_composite, "Three Halfs 16:9") | |
| threehalfs_9_16_labeled = add_label(threehalfs_9_16_composite, "Three Halfs 9:16") | |
| twothirdhalfs_16_9_var1_labeled = add_label( | |
| twothirdhalfs_16_9_var1_composite, "Two Thirds Var1 16:9" | |
| ) | |
| twothirdhalfs_16_9_var2_labeled = add_label( | |
| twothirdhalfs_16_9_var2_composite, "Two Thirds Var2 16:9" | |
| ) | |
| twothirdhalfs_9_16_var1_labeled = add_label( | |
| twothirdhalfs_9_16_var1_composite, "Two Thirds Var1 9:16" | |
| ) | |
| twothirdhalfs_9_16_var2_labeled = add_label( | |
| twothirdhalfs_9_16_var2_composite, "Two Thirds Var2 9:16" | |
| ) | |
| twoequalhalfs_16_9_labeled = add_label( | |
| twoequalhalfs_16_9_composite, "Two Equal Halfs 16:9" | |
| ) | |
| twoequalhalfs_9_16_labeled = add_label( | |
| twoequalhalfs_9_16_composite, "Two Equal Halfs 9:16" | |
| ) | |
| # Convert all output images to PIL format | |
| def cv2_to_pil(img): | |
| if img is None: | |
| return None | |
| return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) | |
| # Convert standard crops | |
| standard_crops = { | |
| "cutout": cv2_to_pil(cutout_image_labeled), | |
| "16:9": cv2_to_pil(cutout_16_9_labeled), | |
| "9:16": cv2_to_pil(cutout_9_16_labeled), | |
| } | |
| # Convert threehalfs layouts | |
| threehalfs_layouts = { | |
| "16:9": cv2_to_pil(threehalfs_16_9_labeled), | |
| "9:16": cv2_to_pil(threehalfs_9_16_labeled), | |
| } | |
| # Convert twothirdhalfs layouts | |
| twothirdhalfs_layouts = { | |
| "16:9_var1": cv2_to_pil(twothirdhalfs_16_9_var1_labeled), | |
| "16:9_var2": cv2_to_pil(twothirdhalfs_16_9_var2_labeled), | |
| "9:16_var1": cv2_to_pil(twothirdhalfs_9_16_var1_labeled), | |
| "9:16_var2": cv2_to_pil(twothirdhalfs_9_16_var2_labeled), | |
| } | |
| # Convert twoequalhalfs layouts | |
| twoequalhalfs_layouts = { | |
| "16:9": cv2_to_pil(twoequalhalfs_16_9_labeled), | |
| "9:16": cv2_to_pil(twoequalhalfs_9_16_labeled), | |
| } | |
| return ( | |
| standard_crops, | |
| threehalfs_layouts, | |
| twothirdhalfs_layouts, | |
| twoequalhalfs_layouts, | |
| visualization_data, | |
| ) | |
| def draw_layout_regions( | |
| image, left_division, right_division, visualization_data, layout_type | |
| ): | |
| """ | |
| Create a visualization showing the layout regions overlaid on the original image. | |
| Each region is independently centered on the subject, as in practice different videos | |
| would be stacked in these layouts. | |
| Args: | |
| image: PIL Image | |
| left_division: Left division index (1-20) | |
| right_division: Right division index (1-20) | |
| visualization_data: Dictionary with visualization data from create_layouts | |
| layout_type: Type of layout to visualize (e.g., "standard", "threehalfs", "twothirdhalfs_var1", etc.) | |
| Returns: | |
| PIL Image: Original image with layout regions visualized | |
| """ | |
| # Convert PIL Image to cv2 format | |
| if isinstance(image, Image.Image): | |
| image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
| else: | |
| image_cv = image.copy() | |
| # Get a clean copy for drawing | |
| visualization = image_cv.copy() | |
| # Get image dimensions | |
| height, width = image_cv.shape[:2] | |
| # Extract visualization data | |
| original_center_x = visualization_data["original_center_x"] | |
| original_center_y = visualization_data["original_center_y"] | |
| original_person_top = visualization_data["original_person_top"] | |
| original_person_height = visualization_data["original_person_height"] | |
| left_boundary, right_boundary = visualization_data["cutout_bounds"] | |
| cutout_width = right_boundary - left_boundary | |
| # Define colors for different layouts (BGR format) | |
| colors = { | |
| "standard": {"16:9": (0, 255, 0), "9:16": (255, 0, 0)}, # Green, Blue | |
| "threehalfs": {"16:9": (0, 165, 255), "9:16": (255, 255, 0)}, # Orange, Cyan | |
| "twothirdhalfs_var1": { | |
| "16:9": (255, 0, 255), | |
| "9:16": (128, 0, 128), | |
| }, # Magenta, Purple | |
| "twothirdhalfs_var2": { | |
| "16:9": (0, 255, 255), | |
| "9:16": (128, 128, 0), | |
| }, # Yellow, Teal | |
| "twoequalhalfs": { | |
| "16:9": (0, 128, 128), | |
| "9:16": (255, 165, 0), | |
| }, # Dark Cyan, Blue-Green | |
| } | |
| # Define line thickness and font | |
| thickness = 3 | |
| font = cv2.FONT_HERSHEY_SIMPLEX | |
| font_scale = 0.8 | |
| font_thickness = 2 | |
| # Draw standard layouts (16:9 and 9:16) | |
| if layout_type == "standard": | |
| # Draw 16:9 crop | |
| aspect_16_9 = 16 / 9 | |
| target_height_16_9 = int(cutout_width / aspect_16_9) | |
| # Calculate 20% of person height for top margin | |
| top_margin = int(original_person_height * 0.05) | |
| y_start = int(max(0, original_person_top - top_margin)) | |
| if y_start + target_height_16_9 > height: | |
| y_start = int(max(0, height - target_height_16_9)) | |
| y_end = int(min(height, y_start + target_height_16_9)) | |
| cv2.rectangle( | |
| visualization, | |
| (left_boundary, y_start), | |
| (right_boundary, y_end), | |
| colors["standard"]["16:9"], | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| "16:9", | |
| (left_boundary + 5, y_start + 30), | |
| font, | |
| font_scale, | |
| colors["standard"]["16:9"], | |
| font_thickness, | |
| ) | |
| # Draw 9:16 crop | |
| aspect_9_16 = 9 / 16 | |
| target_width_9_16 = int(height * aspect_9_16) | |
| x_start = max( | |
| 0, | |
| min(width - target_width_9_16, original_center_x - target_width_9_16 // 2), | |
| ) | |
| x_end = x_start + target_width_9_16 | |
| cv2.rectangle( | |
| visualization, | |
| (x_start, 0), | |
| (x_end, height), | |
| colors["standard"]["9:16"], | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| "9:16", | |
| (x_start + 5, 30), | |
| font, | |
| font_scale, | |
| colors["standard"]["9:16"], | |
| font_thickness, | |
| ) | |
| # Draw threehalfs layouts - each segment is centered on the subject | |
| elif layout_type == "threehalfs": | |
| # For 16:9 (three 5.3:9 segments side by side - visually only) | |
| aspect_5_3_9 = 5.3 / 9 | |
| segment_height = height | |
| segment_width = int(segment_height * aspect_5_3_9) | |
| # Calculate total width for visualization purposes | |
| total_width = segment_width * 3 | |
| start_x = max(0, original_center_x - total_width // 2) | |
| for i in range(3): | |
| # For visualization, we'll place them side by side | |
| vis_segment_x_start = start_x + i * segment_width | |
| vis_segment_x_end = vis_segment_x_start + segment_width | |
| # But each segment would actually be centered on the subject independently | |
| # Here we also draw the centered version more faintly | |
| actual_segment_x_start = max( | |
| 0, min(width - segment_width, original_center_x - segment_width // 2) | |
| ) | |
| actual_segment_x_end = min(width, actual_segment_x_start + segment_width) | |
| # Draw the visualization placement (side by side) | |
| cv2.rectangle( | |
| visualization, | |
| (vis_segment_x_start, 0), | |
| (vis_segment_x_end, segment_height), | |
| colors["threehalfs"]["16:9"], | |
| thickness, | |
| ) | |
| # Draw the actual centered placement with dashed lines | |
| if i > 0: # Only draw centered versions for parts 2 and 3 | |
| for j in range(0, segment_height, 20): # Dashed line effect | |
| if j % 40 < 20: # Skip every other segment | |
| cv2.line( | |
| visualization, | |
| (actual_segment_x_start, j), | |
| (actual_segment_x_start, min(j + 20, segment_height)), | |
| colors["threehalfs"]["16:9"], | |
| 1, | |
| ) | |
| cv2.line( | |
| visualization, | |
| (actual_segment_x_end, j), | |
| (actual_segment_x_end, min(j + 20, segment_height)), | |
| colors["threehalfs"]["16:9"], | |
| 1, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"16:9 Part {i+1}", | |
| (vis_segment_x_start + 5, 30 + i * 30), | |
| font, | |
| font_scale, | |
| colors["threehalfs"]["16:9"], | |
| font_thickness, | |
| ) | |
| # For 9:16 (three 9:5.3 segments stacked top to bottom - visually only) | |
| aspect_9_16 = 9 / 16 | |
| target_width_9_16 = int(height * aspect_9_16) | |
| x_start = max( | |
| 0, | |
| min(width - target_width_9_16, original_center_x - target_width_9_16 // 2), | |
| ) | |
| x_end = x_start + target_width_9_16 | |
| aspect_9_5_3 = 9 / 5.3 | |
| segment_width_9_16 = target_width_9_16 | |
| segment_height_9_16 = int(segment_width_9_16 / aspect_9_5_3) | |
| # Calculate total height for visualization purposes | |
| total_height = segment_height_9_16 * 3 | |
| start_y = max(0, height // 2 - total_height // 2) | |
| for i in range(3): | |
| # For visualization, we'll place them stacked | |
| vis_segment_y_start = start_y + i * segment_height_9_16 | |
| vis_segment_y_end = min(height, vis_segment_y_start + segment_height_9_16) | |
| # But each segment would actually be centered on the subject independently | |
| # Here we also draw the centered version more faintly | |
| actual_segment_y_start = max( | |
| 0, | |
| min( | |
| height - segment_height_9_16, | |
| original_center_y - segment_height_9_16 // 2, | |
| ), | |
| ) | |
| actual_segment_y_end = min( | |
| height, actual_segment_y_start + segment_height_9_16 | |
| ) | |
| # Draw the visualization placement (stacked) | |
| cv2.rectangle( | |
| visualization, | |
| (x_start, vis_segment_y_start), | |
| (x_end, vis_segment_y_end), | |
| colors["threehalfs"]["9:16"], | |
| thickness, | |
| ) | |
| # Draw the actual centered placement with dashed lines | |
| if i > 0: # Only draw centered versions for parts 2 and 3 | |
| for j in range(x_start, x_end, 20): # Dashed line effect | |
| if j % 40 < 20: # Skip every other segment | |
| cv2.line( | |
| visualization, | |
| (j, actual_segment_y_start), | |
| (min(j + 20, x_end), actual_segment_y_start), | |
| colors["threehalfs"]["9:16"], | |
| 1, | |
| ) | |
| cv2.line( | |
| visualization, | |
| (j, actual_segment_y_end), | |
| (min(j + 20, x_end), actual_segment_y_end), | |
| colors["threehalfs"]["9:16"], | |
| 1, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"9:16 Part {i+1}", | |
| (x_start + 5, vis_segment_y_start + 30), | |
| font, | |
| font_scale, | |
| colors["threehalfs"]["9:16"], | |
| font_thickness, | |
| ) | |
| # Draw twothirdhalfs layouts | |
| elif layout_type == "twothirdhalfs_var1" or layout_type == "twothirdhalfs_var2": | |
| aspect_key = "16:9" if layout_type.endswith("var1") else "9:16" | |
| layout_color = colors[ | |
| ( | |
| "twothirdhalfs_var1" | |
| if layout_type.endswith("var1") | |
| else "twothirdhalfs_var2" | |
| ) | |
| ][aspect_key] | |
| if aspect_key == "16:9": | |
| # For 16:9 (two segments side by side) | |
| aspect_10_6_9 = 10.6 / 9 | |
| aspect_5_3_9 = 5.3 / 9 | |
| segment1_height = height | |
| segment1_width = int( | |
| segment1_height | |
| * (aspect_10_6_9 if layout_type.endswith("var1") else aspect_5_3_9) | |
| ) | |
| segment2_height = height | |
| segment2_width = int( | |
| segment2_height | |
| * (aspect_5_3_9 if layout_type.endswith("var1") else aspect_10_6_9) | |
| ) | |
| # First segment | |
| segment_center_x = original_center_x - segment2_width // 4 | |
| segment_x_start = int( | |
| max( | |
| 0, | |
| min(width - segment1_width, segment_center_x - segment1_width // 2), | |
| ) | |
| ) | |
| segment_x_end = int(min(width, segment_x_start + segment1_width)) | |
| cv2.rectangle( | |
| visualization, | |
| (segment_x_start, 0), | |
| (segment_x_end, segment1_height), | |
| layout_color, | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"16:9 Part 1", | |
| (segment_x_start + 5, 30), | |
| font, | |
| font_scale, | |
| layout_color, | |
| font_thickness, | |
| ) | |
| # Second segment | |
| segment_center_x = original_center_x + segment1_width // 4 | |
| segment_x_start = int( | |
| max( | |
| 0, | |
| min(width - segment2_width, segment_center_x - segment2_width // 2), | |
| ) | |
| ) | |
| segment_x_end = int(min(width, segment_x_start + segment2_width)) | |
| cv2.rectangle( | |
| visualization, | |
| (segment_x_start, 0), | |
| (segment_x_end, segment2_height), | |
| layout_color, | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"16:9 Part 2", | |
| (segment_x_start + 5, 60), | |
| font, | |
| font_scale, | |
| layout_color, | |
| font_thickness, | |
| ) | |
| else: # aspect_key == "9:16" | |
| # For 9:16 (two segments stacked) | |
| aspect_9_16 = 9 / 16 | |
| target_width_9_16 = int(height * aspect_9_16) | |
| x_start = max( | |
| 0, | |
| min( | |
| width - target_width_9_16, | |
| original_center_x - target_width_9_16 // 2, | |
| ), | |
| ) | |
| x_end = x_start + target_width_9_16 | |
| aspect_9_10_6 = 9 / 10.6 | |
| aspect_9_5_3 = 9 / 5.3 | |
| segment1_width = target_width_9_16 | |
| segment1_height = int( | |
| segment1_width | |
| / (aspect_9_10_6 if layout_type.endswith("var1") else aspect_9_5_3) | |
| ) | |
| segment2_width = target_width_9_16 | |
| segment2_height = int( | |
| segment2_width | |
| / (aspect_9_5_3 if layout_type.endswith("var1") else aspect_9_10_6) | |
| ) | |
| # First segment (top) | |
| segment_y_start = 0 | |
| segment_y_end = min(height, segment_y_start + segment1_height) | |
| cv2.rectangle( | |
| visualization, | |
| (x_start, segment_y_start), | |
| (x_end, segment_y_end), | |
| layout_color, | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"9:16 Part 1", | |
| (x_start + 5, segment_y_start + 30), | |
| font, | |
| font_scale, | |
| layout_color, | |
| font_thickness, | |
| ) | |
| # Second segment (bottom) | |
| segment_y_start = segment_y_end | |
| segment_y_end = min(height, segment_y_start + segment2_height) | |
| cv2.rectangle( | |
| visualization, | |
| (x_start, segment_y_start), | |
| (x_end, segment_y_end), | |
| layout_color, | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"9:16 Part 2", | |
| (x_start + 5, segment_y_start + 30), | |
| font, | |
| font_scale, | |
| layout_color, | |
| font_thickness, | |
| ) | |
| # Draw twoequalhalfs layouts | |
| elif layout_type == "twoequalhalfs": | |
| # For 16:9 (two 8:9 segments side by side) | |
| aspect_8_9 = 8 / 9 | |
| segment_height = height | |
| segment_width = int(segment_height * aspect_8_9) | |
| # First segment (left) | |
| segment_center_x = original_center_x - segment_width // 2 | |
| segment_x_start = int( | |
| max(0, min(width - segment_width, segment_center_x - segment_width // 2)) | |
| ) | |
| segment_x_end = int(min(width, segment_x_start + segment_width)) | |
| cv2.rectangle( | |
| visualization, | |
| (segment_x_start, 0), | |
| (segment_x_end, segment_height), | |
| colors["twoequalhalfs"]["16:9"], | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"16:9 Equal 1", | |
| (segment_x_start + 5, 30), | |
| font, | |
| font_scale, | |
| colors["twoequalhalfs"]["16:9"], | |
| font_thickness, | |
| ) | |
| # Second segment (right) | |
| segment_center_x = original_center_x + segment_width // 2 | |
| segment_x_start = int( | |
| max(0, min(width - segment_width, segment_center_x - segment_width // 2)) | |
| ) | |
| segment_x_end = int(min(width, segment_x_start + segment_width)) | |
| cv2.rectangle( | |
| visualization, | |
| (segment_x_start, 0), | |
| (segment_x_end, segment_height), | |
| colors["twoequalhalfs"]["16:9"], | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"16:9 Equal 2", | |
| (segment_x_start + 5, 60), | |
| font, | |
| font_scale, | |
| colors["twoequalhalfs"]["16:9"], | |
| font_thickness, | |
| ) | |
| # For 9:16 (two 9:8 segments stacked) | |
| aspect_9_16 = 9 / 16 | |
| target_width_9_16 = int(height * aspect_9_16) | |
| x_start = max( | |
| 0, | |
| min(width - target_width_9_16, original_center_x - target_width_9_16 // 2), | |
| ) | |
| x_end = x_start + target_width_9_16 | |
| aspect_9_8 = 9 / 8 | |
| segment_width_9_16 = target_width_9_16 | |
| segment_height_9_16 = int(segment_width_9_16 / aspect_9_8) | |
| # First segment (top) | |
| segment_y_start = 0 | |
| segment_y_end = min(height, segment_y_start + segment_height_9_16) | |
| cv2.rectangle( | |
| visualization, | |
| (x_start, segment_y_start), | |
| (x_end, segment_y_end), | |
| colors["twoequalhalfs"]["9:16"], | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"9:16 Equal 1", | |
| (x_start + 5, segment_y_start + 30), | |
| font, | |
| font_scale, | |
| colors["twoequalhalfs"]["9:16"], | |
| font_thickness, | |
| ) | |
| # Second segment (bottom) | |
| segment_y_start = segment_y_end | |
| segment_y_end = min(height, segment_y_start + segment_height_9_16) | |
| cv2.rectangle( | |
| visualization, | |
| (x_start, segment_y_start), | |
| (x_end, segment_y_end), | |
| colors["twoequalhalfs"]["9:16"], | |
| thickness, | |
| ) | |
| cv2.putText( | |
| visualization, | |
| f"9:16 Equal 2", | |
| (x_start + 5, segment_y_start + 30), | |
| font, | |
| font_scale, | |
| colors["twoequalhalfs"]["9:16"], | |
| font_thickness, | |
| ) | |
| # Draw center point of person(s) | |
| center_radius = 8 | |
| cv2.circle( | |
| visualization, | |
| (original_center_x, original_center_y), | |
| center_radius, | |
| (255, 255, 255), | |
| -1, | |
| ) | |
| cv2.circle( | |
| visualization, | |
| (original_center_x, original_center_y), | |
| center_radius, | |
| (0, 0, 0), | |
| 2, | |
| ) | |
| # Convert back to PIL format | |
| visualization_pil = Image.fromarray(cv2.cvtColor(visualization, cv2.COLOR_BGR2RGB)) | |
| return visualization_pil | |
| def get_image_crop(cid=None, rsid=None, uid=None, ct=None): | |
| """ | |
| Function that returns both standard and layout variations for visualization. | |
| Returns: | |
| gr.Gallery: Gallery of all generated images | |
| """ | |
| try: | |
| sprites_data = get_sprite_firebase(cid, rsid, uid) | |
| image_paths = [sprite_data["url"] for sprite_data in sprites_data] | |
| durations = [sprite_data["duration"] for sprite_data in sprites_data] | |
| except Exception: | |
| image_paths = [ | |
| # "data/C2-Roll3D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", | |
| # "data/E2-HamzaA-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", | |
| "data/F2-Roll4D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", | |
| "data/G2-Roll5D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", | |
| "data/C1-Roll10D-i1x-Take2-Mar20.25-PST12.14.56pm.jpg", | |
| "data/C2-Roll10D-i2x-Take2-Mar20.25-PST12.14.56pm.jpg", | |
| ] | |
| # Lists to store all images | |
| all_images = [] | |
| all_captions = [] | |
| for image_path in image_paths: | |
| # Load image (from local file or URL) | |
| try: | |
| if image_path.startswith(("http://", "https://")): | |
| response = requests.get(image_path) | |
| input_image = Image.open(BytesIO(response.content)) | |
| else: | |
| input_image = Image.open(image_path) | |
| except Exception as e: | |
| print(f"Error loading image {image_path}: {e}") | |
| continue | |
| # Get the middle thumbnail | |
| mid_image = get_middle_thumbnail(input_image) | |
| # Add numbered divisions for GPT-4V analysis | |
| numbered_mid_image = add_top_numbers( | |
| input_image=mid_image, | |
| num_divisions=20, | |
| margin=50, | |
| font_size=30, | |
| dot_spacing=20, | |
| ) | |
| # Analyze the image to get optimal crop divisions | |
| # This uses GPT-4V to identify the optimal crop points | |
| (left_division, right_division, num_of_speakers) = analyze_image( | |
| numbered_mid_image, remove_unwanted_prompt(1), mid_image, ct | |
| ) | |
| # Safety check for divisions | |
| if left_division <= 0: | |
| left_division = 1 | |
| if right_division > 20: | |
| right_division = 20 | |
| if left_division >= right_division: | |
| left_division = 1 | |
| right_division = 20 | |
| print(f"Using divisions: left={left_division}, right={right_division}") | |
| # Create layouts and cutouts using the new function | |
| ( | |
| standard_crops, | |
| threehalfs_layouts, | |
| twothirdhalfs_layouts, | |
| twoequalhalfs_layouts, | |
| visualization_data, | |
| ) = create_layouts(mid_image, left_division, right_division, num_of_speakers) | |
| # Create all the required visualizations | |
| # 1. Standard aspect ratio visualization (16:9 and 9:16) | |
| standard_visualization = draw_layout_regions( | |
| mid_image, left_division, right_division, visualization_data, "standard" | |
| ) | |
| all_images.append(standard_visualization) | |
| all_captions.append( | |
| f"Standard Aspect Ratios (16:9 & 9:16) {standard_visualization.size}" | |
| ) | |
| # Add input and middle image to gallery | |
| all_images.append(input_image) | |
| all_captions.append(f"Input Image {input_image.size}") | |
| all_images.append(mid_image) | |
| all_captions.append(f"Middle Thumbnail {mid_image.size}") | |
| # Add standard crops | |
| for key, crop in standard_crops.items(): | |
| all_images.append(crop) | |
| all_captions.append(f"{key} {crop.size}") | |
| # Add threehalfs layouts | |
| for key, layout in threehalfs_layouts.items(): | |
| all_images.append(layout) | |
| all_captions.append(f"Three Halfs {key} {layout.size}") | |
| # Add twothirdhalfs layouts | |
| for key, layout in twothirdhalfs_layouts.items(): | |
| all_images.append(layout) | |
| all_captions.append(f"Two-Thirds Halfs {key} {layout.size}") | |
| # Add twoequalhalfs layouts | |
| for key, layout in twoequalhalfs_layouts.items(): | |
| all_images.append(layout) | |
| all_captions.append(f"Two Equal Halfs {key} {layout.size}") | |
| # Return gallery with all images | |
| return gr.Gallery(value=list(zip(all_images, all_captions))) | |