# Video Augmentation using META SAM-2 Model with YOLO model and Stability AI

### Importing Images with Annoted text file for Yolov8n Model Training

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### upload your image directory with .txt annoted file in the format required by yolo model for training, with video on which model has to predict.

### incase if wants to use pre_trained YOLO model, jump to section of pretrained model., or incase want to manually put coordinates on a frame jump to section of video segmenting.

### Installing Required Libraries

In [None]:
!pip install ultralytics opencv-python
!pip install -U ipywidgets

#  Yolov8n Model training 

## Yaml file creation and model training


In [None]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt

# Load YOLOv8 model configuration (e.g., YOLOv8 nano model)
model = YOLO('yolov8n.yaml')

# Create a dataset.yaml file for YOLOv8 training
dataset_yaml_content = """
train: "/kaggle/input/yolov-train-data/Bottle"
val: "/kaggle/input/yolov-train-data/Bottle"
nc: 1  # Number of classes (1 in this case)
names: ['bottle']
"""

# Save the dataset.yaml file
with open('dataset.yaml', 'w') as f:
    f.write(dataset_yaml_content)

    

# Train the model with the specified dataset and parameters
model.train(
    data='dataset.yaml',  # Path to the dataset.yaml file
    epochs=100,           # Increase epochs for better results with small datasets
    imgsz=1024,           # Use the resized image dimensions
    batch=1,              # Set batch size to 4 due to limited data
    patience=50,          # Early stopping if no improvement
    lr0=0.0001,            # Start with a lower learning rate
    augment=True,  # Enable data augmentation
#     weights='yolov8n.pt'  # Start training with pre-trained weights (optional)
)


### Note: You may have to enter wandb.ai api if using Kaggle

## prediction on an Image

In [None]:
# Load a test image
img = cv2.imread('/kaggle/input/yolov-train-data/Bottle/IMG202408142240012.jpg')

# Predict
results = model.predict(img)

# Alternatively, you can use matplotlib to display the results
plt.imshow(results[0].plot())  # `plot` returns an image with bounding boxes drawn
plt.axis('off')
plt.show()

## Predicting on Video & detecting the First Frame, and its center coordinates

In [None]:
# Process the video
video_path = '/kaggle/input/yolov-train-data/VID202408142242002.mp4'
cap = cv2.VideoCapture(video_path)

x_center=0
y_center=0
frame_number = 0
object_detected = False

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_number += 1

    # Run YOLOv8 detection
    results = model(frame)

    for r in results:
        if r.boxes:  # Check if any object is detected
            for box in r.boxes:
                # Get the bounding box coordinates
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()

                # Calculate the center coordinates
                x_center = int((x1 + x2) / 2)
                y_center = int((y1 + y2) / 2)
                
                # Print the first frame number and center coordinates
                print(f"First detection at frame: {frame_number}")
                print(f"Center coordinates: (x={x_center}, y={y_center})")

                object_detected = True
                break

    if object_detected:
        break

cap.release()


In [None]:
print("x_center:",x_center)
print("y_center:",y_center)

# Using Yolov8s pretrained model for direct detection and getting the frame

#### just mention class name and it will return frame no. and coordinates

In [None]:
# Load the YOLOv8s model
model = YOLO('yolov8s.pt')  # Make sure the model is trained on the "bottle" class

# Process the video
video_path = '/kaggle/input/yolov-train-data/VID202408142242002.mp4'
cap = cv2.VideoCapture(video_path)

x_center = 0
y_center = 0
frame_number = 0
object_detected = False
confidence_threshold = 0.8  # Set the confidence threshold

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_number += 1

    # Run YOLOv8 detection
    results = model(frame)

    for r in results:
        for box in r.boxes:
            # Get the class label for the detected object
            cls = int(box.cls[0].cpu().numpy())
            class_name = model.names[cls]

            # Check if the detected object is a "bottle" and has confidence > 0.8
            confidence = box.conf[0].cpu().numpy()
            if class_name == 'bottle' and confidence > confidence_threshold:
                # Get the bounding box coordinates
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()

                # Calculate the center coordinates
                x_center = int((x1 + x2) / 2)
                y_center = int((y1 + y2) / 2)
                
                # Print the first frame number and center coordinates
                print(f"First bottle detection at frame: {frame_number}")
                print(f"Center coordinates: (x={x_center}, y={y_center}) with confidence {confidence:.2f}")

                object_detected = True
                break  # Exit the loop after the first detection

    if object_detected:
        break  # Exit the main loop after the first detection

cap.release()

# If no bottle was detected with confidence > 0.8
if not object_detected:
    print("No requested Object detected in the video with confidence greater than 0.8.")


In [None]:
print("x_center:",x_center)
print("y_center:",y_center)
print("Frame No.:",frame_number)

#### clearing GPU cache

In [None]:
import torch
torch.cuda.empty_cache()
print("Done")

# Video segmenting

### importing SAM-2 model (may take a while to download)

In [None]:
!git clone https://github.com/facebookresearch/segment-anything-2.git
%cd /kaggle/working/segment-anything-2
%pip install -e .
%cd /kaggle/working/segment-anything-2/checkpoints
!bash /kaggle/working/segment-anything-2/checkpoints/download_ckpts.sh
%cd /kaggle/working/segment-anything-2

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
# use bfloat16 for the entire notebook
torch.autocast(device_type="cuda", dtype=torch.float16).__enter__()

if torch.cuda.get_device_properties(0).major >= 8:
    # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

## video to frames

In [None]:
import cv2
import os
import shutil

def video_to_frames(video_path, output_folder):
    # Ensure the output folder is clean
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)
    os.makedirs(output_folder)
    
    # Open the video file
    video_capture = cv2.VideoCapture(video_path)
    
    frame_count = 0
    success = True

    while success:
        success, frame = video_capture.read()
        if success:
            # Save the frame with a consistent naming convention
            frame_filename = os.path.join(output_folder, f"{frame_count:05d}.jpg")
            cv2.imwrite(frame_filename, frame)
            frame_count += 1

    video_capture.release()
    print(f"Extracted {frame_count} frames to {output_folder}")
    return frame_count

# Example usage
video_path = "/kaggle/input/shaolin-soccer/Untitled video - Made with Clipchamp.mp4"
output_folder = "/kaggle/working/output_frames"
total_frames = video_to_frames(video_path, output_folder)


## reordering Frames to video propagation


In [1]:
frame_number =0 

### (replace it with **frame_number** if using YOLO model)

#### frame_number = frame_number

In [None]:
import os
import shutil

def reorder_frames(video_dir, ann_frame_idx, output_dir):
    # Ensure the output directory is clean
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)
    
    # Get and sort the list of frame filenames
    frame_names = [
        p for p in os.listdir(video_dir)
        if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
    ]
    frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))
    
    total_frames = len(frame_names)
    
    # Copy and reorder the frames to the new directory
    for i in range(total_frames):
        if i >= ann_frame_idx:
            new_idx = i - ann_frame_idx
        else:
            new_idx = total_frames - ann_frame_idx + i
        old_path = os.path.join(video_dir, frame_names[i])
        new_path = os.path.join(output_dir, f"{new_idx:05d}.jpg")
        shutil.copy2(old_path, new_path)
    
    print(f"Frames reordered and copied to {output_dir} successfully.")
    return len(os.listdir(output_dir))

# Example usage
reordered_dir = "/kaggle/working/reordered_frames"
ann_frame_idx = frame_number  # Frame index to start as 0
reordered_count = reorder_frames(output_folder, ann_frame_idx, reordered_dir)

# Verify total frame consistency
if total_frames == reordered_count:
    print("Frame count matches after reordering.")
else:
    print(f"Frame count mismatch! Extracted: {total_frames}, Reordered: {reordered_count}")


## Importing Model and creating predictor

In [None]:
from sam2.build_sam import build_sam2_video_predictor

sam2_checkpoint = "/kaggle/working/segment-anything-2/checkpoints/sam2_hiera_base_plus.pt"
model_cfg = "sam2_hiera_b+.yaml"

predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint)

## checking image where object is detected

In [None]:
frame_no = frame_number

def show_mask(mask, ax, obj_id=None, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        cmap = plt.get_cmap("tab10")
        cmap_idx = 0 if obj_id is None else obj_id
        color = np.array([*cmap(cmap_idx)[:3], 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)


def show_points(coords, labels, ax, marker_size=200):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    
# `video_dir` a directory of JPEG frames with filenames like `<frame_index>.jpg`
video_dir = "/kaggle/working/reordered_frames"

# scan all the JPEG frame names in this directory
frame_names = [
    p for p in os.listdir(video_dir)
    if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
]
frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))

# take a look the first video frame
frame_idx = frame_no
plt.figure(figsize=(12, 8))
plt.title(f"frame {frame_idx}")
plt.imshow(Image.open(os.path.join(video_dir, frame_names[frame_idx])))

In [None]:
inference_state = predictor.init_state(video_path=video_dir)
predictor.reset_state(inference_state)

### Masking the image object where object is detected in frame with coordinates

In [None]:
x_center= 1050
y_center = 650

### in case using Yolo model replace,

### x_center =x_center
### y_center =y_center

In [None]:
ann_frame_idx = 0  # the frame index we interact with
ann_obj_id = 1  # give a unique id to each object we interact with (it can be any integers)
x = x_center
y = y_center

points = np.array([[x,y]], dtype=np.float32)
labels = np.array([1], np.int32)
_, out_obj_ids, out_mask_logits = predictor.add_new_points(
    inference_state=inference_state,
    frame_idx=ann_frame_idx,
    obj_id=ann_obj_id,
    points=points,
    labels=labels,
)

plt.figure(figsize=(12, 8))
plt.title(f"frame {ann_frame_idx}")
plt.imshow(Image.open(os.path.join(video_dir, frame_names[ann_frame_idx])))
show_points(points, labels, plt.gca())
show_mask((out_mask_logits[0] > 0.0).cpu().numpy(), plt.gca(), obj_id=out_obj_ids[0])

### Note: provide additional points if object not detected properly

### in the format
#### points = np.array([[x,y],[x1,y1],[x2,y2]], dtype=np.float32)
#### labels = np.array([1,1,1], np.int32)

#### in labels 1 indicate inclusive and 0 excluding point

In [None]:
def count_files_in_folder(folder_path):
    """
    Count the number of files in a given folder.
    
    Args:
    - folder_path (str): Path to the folder.
    
    Returns:
    - int: Number of files in the folder.
    """
    return len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])

# Example usage
folder_path = "/kaggle/working/reordered_frames"  # Replace with your actual folder path
num_files = count_files_in_folder(folder_path)
print(f"Number of files in the folder: {num_files}")


## Mask generation
### Propagating into Video with reordered Frames

### if Addition points are provided also change them in below code

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import shutil  # Importing shutil to remove directories

def apply_mask_to_image(frame, mask):
    """
    Apply a mask to an image frame, setting non-mask areas to zero.
    """
    h, w, _ = frame.shape
    mask_resized = np.resize(mask, (h, w))  # Resize mask to match frame dimensions
    mask_3d = np.repeat(mask_resized[:, :, np.newaxis], 3, axis=2)  # Expand mask dimensions for RGB channels
    masked_frame = frame * mask_3d  # Apply the mask to the frame
    return masked_frame

def show_mask(mask, ax, obj_id=None, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        cmap = plt.get_cmap("tab10")
        cmap_idx = 0 if obj_id is None else obj_id
        color = np.array([*cmap(cmap_idx)[:3], 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

def show_points(coords, labels, ax, marker_size=200):
    pos_points = coords[labels == 1]
    neg_points = coords[labels == 0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)

# `video_dir` a directory of JPEG frames with filenames like `<frame_index>.jpg`
video_dir = "/kaggle/working/reordered_frames"

# Scan all the JPEG frame names in this directory
frame_names = [
    p for p in os.listdir(video_dir)
    if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
]
frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))

# Initialize predictor and inference state
inference_state = predictor.init_state(video_path=video_dir)

# Reset the predictor state
predictor.reset_state(inference_state)

# Frame and object IDs
ann_frame_idx = 0  # frames are reordered
ann_obj_id = 1  # Give a unique ID to each object we interact with (can be any integer)

# Add a 2nd positive click at (x, y) = (250, 220) to refine the mask
points = np.array([[x,y]], dtype=np.float32)
labels = np.array([1], np.int32)  # 1 means positive click, 0 means negative click
_, out_obj_ids, out_mask_logits = predictor.add_new_points(
    inference_state=inference_state,
    frame_idx=ann_frame_idx,
    obj_id=ann_obj_id,
    points=points,
    labels=labels,
)

# Run propagation throughout the video and collect the results in a dict
video_segments = {}  # video_segments contains the per-frame segmentation results
for out_frame_idx, out_obj_ids, out_mask_logits in predictor.propagate_in_video(inference_state):
    video_segments[out_frame_idx] = {
        out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy()
        for i, out_obj_id in enumerate(out_obj_ids)
    }

# Create an output directory for images
output_dir = '/kaggle/working/mask_segmentation_images'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
else:
    # If the directory exists, clear its kaggle/workings
    for filename in os.listdir(output_dir):
        file_path = os.path.join(output_dir, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

# Render and save masked images every few frames
vis_frame_stride = 1
plt.close("all")
for out_frame_idx in range(0, len(frame_names), vis_frame_stride):
    frame = np.array(Image.open(os.path.join(video_dir, frame_names[out_frame_idx])))
    masked_frame = frame.copy()  # Create a copy of the frame for modification
    for out_obj_id, out_mask in video_segments[out_frame_idx].items():
        masked_frame = apply_mask_to_image(masked_frame, out_mask)

    # Convert masked frame to Image object for saving
    masked_image = Image.fromarray(masked_frame.astype('uint8'))
    masked_image.save(os.path.join(output_dir, f'frame_{out_frame_idx}.png'))

    # Optional: Display the masked frame
#     plt.figure(figsize=(6, 4))
#     plt.title(f"frame {out_frame_idx}")
#     plt.imshow(masked_frame)
#     plt.show()


### we can also display the masked frame(s) by un-commenting the last 4 rows

## restore Original order of the video frames

### this will restore the original order of the frames

In [None]:
import os
import shutil

def restore_original_order(video_dir, ann_frame_idx, output_dir):
    """
    Restore the original order of frames from a directory and save them into a new directory.
    
    Args:
    - video_dir (str): Directory containing the reordered frames.
    - ann_frame_idx (int): The frame index used to start the reordering.
    - output_dir (str): Directory to save the restored frames.
    """
    # Ensure the output directory is clean
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)
    
    # Get a list of all frame filenames in the original directory
    frame_names = [
        p for p in os.listdir(video_dir)
        if p.endswith(".png") and p.startswith("frame_")
    ]
    
    # Ensure frames are sorted numerically by extracting the number from the filename
    frame_names.sort(key=lambda p: int(p.split('_')[-1].split('.')[0]))

    # Calculate total number of frames
    total_frames = len(frame_names)

    # Calculate the original frame indices
    original_indices = {}
    for i in range(total_frames):
        if i < (total_frames - ann_frame_idx):
            original_idx = i + ann_frame_idx
        else:
            original_idx = i - (total_frames - ann_frame_idx)
        original_indices[frame_names[i]] = f"frame_{original_idx:03d}.png"
    
    # Copy and rename the files into the new directory
    for old_name, new_name in original_indices.items():
        old_path = os.path.join(video_dir, old_name)
        new_path = os.path.join(output_dir, new_name)
        shutil.copy2(old_path, new_path)
    
    print(f"Frames restored to original order and saved to {output_dir} successfully.")

# Example usage
video_dir = "/kaggle/working/mask_segmentation_images"  # Replace with your original frames directory
ann_frame_idx = 0  # The frame index used to start the reordering
output_dir = "/kaggle/working/restored_frames"  # Replace with your desired output folder path
restore_original_order(video_dir, ann_frame_idx, output_dir)


## converting mask Frames back to video

In [None]:
import cv2
import os

def frames_to_video(frames_folder, output_video_path, fps=30):
    # Check if the output video file already exists and delete it
    if os.path.exists(output_video_path):
        try:
            os.remove(output_video_path)
            print(f"Existing file {output_video_path} removed.")
        except Exception as e:
            print(f"Failed to remove {output_video_path}. Reason: {e}")
            return

    # Get a list of frame files and sort them by name
    frame_files = [f for f in os.listdir(frames_folder) if f.endswith('.png')]
    frame_files.sort(key=lambda f: int(f.split('_')[-1].split('.')[0]))  # Sort by frame number

    # Check if there are any frames to process
    if not frame_files:
        print("No frames found in the specified folder.")
        return

    # Read the first frame to get the dimensions
    first_frame_path = os.path.join(frames_folder, frame_files[0])
    first_frame = cv2.imread(first_frame_path)
    if first_frame is None:
        print(f"Failed to read the first frame at {first_frame_path}")
        return
    height, width, _ = first_frame.shape

    # Initialize the video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for mp4 format
    video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    # Write each frame to the video
    for frame_file in frame_files:
        frame_path = os.path.join(frames_folder, frame_file)
        frame = cv2.imread(frame_path)
        if frame is None:
            print(f"Failed to read frame at {frame_path}")
            continue
        video_writer.write(frame)

    # Release the video writer
    video_writer.release()
    print(f"Video saved to {output_video_path}")

# Example usage
frames_folder = r'/kaggle/working/restored_frames'  # Replace with the folder containing your frames
output_video_path = r"/kaggle/working/mask_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)


## Inverse Mask Generation

### similarly in case of additional points make changes here also

In [None]:
def clear_output_directory(directory):
    """
    Remove all files in the given directory.
    """
    if os.path.exists(directory):
        for file in os.listdir(directory):
            file_path = os.path.join(directory, file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(f"Failed to delete {file_path}. Reason: {e}")

def apply_inverse_mask_to_image(frame, mask):
    """
    Apply the inverse of a mask to an image frame, setting mask areas to zero.
    """
    h, w, _ = frame.shape
    mask_resized = np.resize(mask, (h, w))  # Resize mask to match frame dimensions
    inverse_mask = 1 - mask_resized  # Invert the mask
    mask_3d = np.repeat(inverse_mask[:, :, np.newaxis], 3, axis=2)  # Expand mask dimensions for RGB channels
    masked_frame = frame * mask_3d  # Apply the inverse mask to the frame
    return masked_frame

def save_masked_image(masked_frame, out_frame_idx, output_dir):
    """
    Save the masked image to the output directory.
    """
    # Convert masked frame to Image object for saving
    masked_image = Image.fromarray(masked_frame.astype('uint8'))
    masked_image.save(os.path.join(output_dir, f'frame_{out_frame_idx}.png'))

def show_mask(mask, ax, obj_id=None, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        cmap = plt.get_cmap("tab10")
        cmap_idx = 0 if obj_id is None else obj_id
        color = np.array([*cmap(cmap_idx)[:3], 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

def show_points(coords, labels, ax, marker_size=200):
    pos_points = coords[labels == 1]
    neg_points = coords[labels == 0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)

# `video_dir` a directory of JPEG frames with filenames like `<frame_index>.jpg`
video_dir = "/kaggle/working/reordered_frames"

# Scan all the JPEG frame names in this directory
frame_names = [
    p for p in os.listdir(video_dir)
    if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
]
frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))

# Initialize predictor and inference state
inference_state = predictor.init_state(video_path=video_dir)

# Reset the predictor state
predictor.reset_state(inference_state)

# Frame and object IDs
ann_frame_idx = 0  # The frame index we interact with
ann_obj_id = 1  # Give a unique ID to each object we interact with (can be any integer)

# Add a 2nd positive click at (x, y) = (250, 220) to refine the mask
points = np.array([[x,y]], dtype=np.float32)
labels = np.array([1], np.int32)  # 1 means positive click, 0 means negative click
_, out_obj_ids, out_mask_logits = predictor.add_new_points(
    inference_state=inference_state,
    frame_idx=ann_frame_idx,
    obj_id=ann_obj_id,
    points=points,
    labels=labels,
)

# Run propagation throughout the video and collect the results in a dict
video_segments = {}  # video_segments contains the per-frame segmentation results
for out_frame_idx, out_obj_ids, out_mask_logits in predictor.propagate_in_video(inference_state):
    video_segments[out_frame_idx] = {
        out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy()
        for i, out_obj_id in enumerate(out_obj_ids)
    }

# Create an output directory for images
output_dir = '/kaggle/working/inverse_segmentation_images'
os.makedirs(output_dir, exist_ok=True)

# Clear the output directory
clear_output_directory(output_dir)

# Render and save inverse masked images every few frames
vis_frame_stride = 1
plt.close("all")
for out_frame_idx in range(0, len(frame_names), vis_frame_stride):
    frame = np.array(Image.open(os.path.join(video_dir, frame_names[out_frame_idx])))
    masked_frame = frame.copy()  # Create a copy of the frame for modification
    for out_obj_id, out_mask in video_segments[out_frame_idx].items():
        masked_frame = apply_inverse_mask_to_image(masked_frame, out_mask)

    # Save the inverse masked frame
    save_masked_image(masked_frame, out_frame_idx, output_dir)

    # Optional: Display the inverse masked frame
    # plt.figure(figsize=(6, 4))
    # plt.title(f"frame {out_frame_idx}")
    # plt.imshow(masked_frame)
    # plt.show()


## restoring to original frames of inverse mask

In [None]:
video_dir = "/kaggle/working/inverse_segmentation_images"  # Replace with your original frames directory
ann_frame_idx = 0  # The frame index used to start the reordering
output_dir = "/kaggle/working/inverse_restored_frames"  # Replace with your desired output folder path
restore_original_order(video_dir, ann_frame_idx, output_dir)

## converting inverse mask frames to video

In [None]:
frames_folder = r'/kaggle/working/inverse_restored_frames'  # Replace with the folder containing your frames
output_video_path = r"/kaggle/working/inverse_mask_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)

# Video mask Pixelation

In [None]:
def pixelate_area(image, mask, pixelation_level):
    """
    Apply pixelation to the masked area of an image.

    Parameters:
    - image: NumPy array of the image to be pixelated.
    - mask: Boolean NumPy array indicating the masked area.
    - pixelation_level: Int, the size of the blocks used for pixelation.
    """
    # Create a copy of the image to modify
    pixelated_image = image.copy()

    # Get image dimensions
    h, w, _ = image.shape

    # Loop through the masked area and apply pixelation
    for y in range(0, h, pixelation_level):
        for x in range(0, w, pixelation_level):
            # Define the block area
            block = (slice(y, min(y + pixelation_level, h)), slice(x, min(x + pixelation_level, w)))

            # Check if the block is within the masked area
            if np.any(mask[block]):
                # Compute the mean color of the block
                mean_color = image[block].mean(axis=(0, 1)).astype(int)

                # Apply the mean color to the block
                pixelated_image[block] = mean_color

    return pixelated_image

def combine_pixelated_mask(masked_image_path, inverse_masked_image_path, save_path, pixelation_level=10):
    """
    Combine the pixelated masked areas from the masked image with the inverse-masked image.

    Parameters:
    - masked_image_path: String, path to the masked image.
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - save_path: String, path where the combined image will be saved.
    - pixelation_level: Int, the size of the blocks used for pixelation.
    """
    # Open images
    masked_image = Image.open(masked_image_path).convert("RGBA")
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Ensure images are the same size by resizing the inverse image
    if masked_image.size != inverse_masked_image.size:
        inverse_masked_image = inverse_masked_image.resize(masked_image.size)

    # Convert images to numpy arrays
    masked_array = np.array(masked_image)
    inverse_masked_array = np.array(inverse_masked_image)

    # Create a mask where the original mask was applied (non-zero areas in any color channel)
    mask = np.any(masked_array[..., :3] > 0, axis=-1)

    # Pixelate the masked area
    pixelated_mask = pixelate_area(masked_array, mask, pixelation_level)

    # Replace inverse-masked image values with pixelated masked image values where mask is true
    combined_array = inverse_masked_array.copy()
    combined_array[mask] = pixelated_mask[mask]

    # Convert back to image
    combined_image = Image.fromarray(combined_array)

    # Save the combined image
    combined_image.save(save_path)
    print(f"Combined image saved as {save_path}")

#     # Display the combined image
#     plt.imshow(combined_image)
#     plt.axis('off')
#     plt.show()

# Directory paths
masked_images_dir = "/kaggle/working/restored_frames"
inverse_images_dir = "/kaggle/working/inverse_restored_frames"
output_dir = "/kaggle/working/pixelated_combined_images"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get and sort the list of image files
image_files = [f for f in os.listdir(masked_images_dir) if f.startswith("frame_") and f.endswith(".png")]
image_files.sort(key=lambda f: int(f.split('_')[-1].split('.')[0]))

# Iterate over the sorted files
for image_name in image_files:
    masked_image_path = os.path.join(masked_images_dir, image_name)
    inverse_image_path = os.path.join(inverse_images_dir, image_name)
    save_path = os.path.join(output_dir, f"pixelated_combined_{image_name}")

    # Check if the corresponding inverse image exists before combining
    if os.path.exists(inverse_image_path):
        combine_pixelated_mask(masked_image_path, inverse_image_path, save_path, pixelation_level=20)
    else:
        print(f"Warning: Missing inverse file for {image_name}. Skipping combination.")


## converting frames of pixels to video

In [None]:
def frames_to_video(frames_folder, output_video_path, fps=30):
    # Get a list of frame files and sort them by name
    frame_files = [f for f in os.listdir(frames_folder) if f.endswith('.png')]

    # Sort by frame number, assuming the filename format is "frame_<number>.png"
    frame_files.sort(key=lambda f: int(f.split('_')[-1].split('.')[0]))

    if not frame_files:
        print("No frame files found in the specified directory.")
        return

    # Read the first frame to get the dimensions
    first_frame_path = os.path.join(frames_folder, frame_files[0])
    first_frame = cv2.imread(first_frame_path)
    if first_frame is None:
        print(f"Error reading the first frame: {first_frame_path}")
        return

    height, width, _ = first_frame.shape

    # Initialize the video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for mp4 format
    video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    # Write each frame to the video
    for frame_file in frame_files:
        frame_path = os.path.join(frames_folder, frame_file)
        frame = cv2.imread(frame_path)
        if frame is not None:
            video_writer.write(frame)
        else:
            print(f"Error reading frame: {frame_path}")

    # Release the video writer
    video_writer.release()
    print(f"Video saved to {output_video_path}")

# Example usage
frames_folder = '/kaggle/working/pixelated_combined_images'  # Replace with the folder containing your frames
output_video_path = "/kaggle/working/pixelated_combined_images_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)


## side by side video of original with pixelated video.

In [None]:
from PIL import Image
import os
import subprocess
import shutil

# Directories for the input frames and output combined frames (switched)
dir1 = '/kaggle/working/output_frames'  # Formerly dir2https://accounts.google.com/b/0/AddMailService
dir2 = '/kaggle/working/pixelated_combined_images'  # Formerly dir1
output_dir = '/kaggle/working/combined_frames_pix'
video_output = '/kaggle/working/pix_output_video.mp4'

# Ensure the output directory exists and is empty
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)  # Remove the directory and its contents
os.makedirs(output_dir)  # Recreate the empty directory

# Remove the previous video if it exists
if os.path.exists(video_output):
    os.remove(video_output)

# Get sorted lists of the frames
frames1 = sorted([f for f in os.listdir(dir1) if f.endswith('.jpg')])
frames2 = sorted([f for f in os.listdir(dir2) if f.endswith('.png')])

# Iterate over both directories and combine images
for idx, (f1, f2) in enumerate(zip(frames1, frames2), start=1):
    img1 = Image.open(os.path.join(dir1, f1))
    img2 = Image.open(os.path.join(dir2, f2))
    
    # Assuming both images have the same height, concatenate side by side
    combined_img = Image.new('RGB', (img1.width + img2.width, img1.height))
    combined_img.paste(img1, (0, 0))
    combined_img.paste(img2, (img1.width, 0))
    
    # Save combined image with a sequential name like combined_frame_001.png
    combined_img.save(os.path.join(output_dir, f"combined_frame_{idx:03d}.png"))

print(f"Frames combined and saved in {output_dir}")

# List the files in the output directory to verify they exist
print("Files in output directory:", os.listdir(output_dir))

# Convert the combined frames into a video using ffmpeg
subprocess.run([
    'ffmpeg', '-framerate', '30', '-i', 
    f'{output_dir}/combined_frame_%03d.png', '-c:v', 
    'libx264', '-pix_fmt', 'yuv420p', video_output
])

print(f"Video saved as {video_output}")


# Masked area Hue change in video

In [None]:
import matplotlib.colors as mcolors

def change_hue(image, mask, hue_shift):
    """
    Change the hue of the masked area in an image.

    Parameters:
    - image: NumPy array of the image to be modified (in RGB).
    - mask: Boolean NumPy array indicating the masked area.
    - hue_shift: Float, amount to shift the hue (0 to 1 for a complete cycle).
    """
    # Convert the image to float in the range [0, 1]
    float_image = image.astype('float32') / 255.0

    # Convert to HSV
    hsv_image = mcolors.rgb_to_hsv(float_image)

    # Change the hue in the masked area
    hsv_image[..., 0][mask] = (hsv_image[..., 0][mask] + hue_shift) % 1.0

    # Convert back to RGB
    modified_float_image = mcolors.hsv_to_rgb(hsv_image)

    # Scale back to [0, 255]
    modified_image = (modified_float_image * 255).astype('uint8')

    return modified_image

def combine_hue_modified_mask(masked_image_path, inverse_masked_image_path, save_path, hue_shift=0.1):
    """
    Combine the hue-modified masked areas from the masked image with the inverse-masked image.

    Parameters:
    - masked_image_path: String, path to the masked image.
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - save_path: String, path where the combined image will be saved.
    - hue_shift: Float, amount to shift the hue (0 to 1 for a complete cycle).
    """
    # Open images
    masked_image = Image.open(masked_image_path).convert("RGBA")
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Ensure images are the same size by resizing the inverse image
    if masked_image.size != inverse_masked_image.size:
        inverse_masked_image = inverse_masked_image.resize(masked_image.size)

    # Convert images to numpy arrays
    masked_array = np.array(masked_image)
    inverse_masked_array = np.array(inverse_masked_image)

    # Create a mask where the original mask was applied (non-zero areas in any color channel)
    mask = np.any(masked_array[..., :3] > 0, axis=-1)

    # Change the hue of the masked area
    hue_modified_mask = change_hue(masked_array[..., :3], mask, hue_shift)

    # Replace inverse-masked image values with hue-modified masked image values where mask is true
    combined_array = inverse_masked_array.copy()
    combined_array[mask] = np.dstack((hue_modified_mask, masked_array[..., 3]))[mask]  # Preserve alpha channel

    # Convert back to image
    combined_image = Image.fromarray(combined_array)

    # Save the combined image
    combined_image.save(save_path)
    print(f"Combined image saved as {save_path}")

#     # Display the combined image
#     plt.imshow(combined_image)
#     plt.axis('off')
#     plt.show()

# Directory paths
masked_images_dir = "/kaggle/working/restored_frames"
inverse_images_dir = "/kaggle/working/inverse_restored_frames"
output_dir = "/kaggle/working/hue_combined_images"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get and sort the list of image files
image_files = [f for f in os.listdir(masked_images_dir) if f.startswith("frame_") and f.endswith(".png")]
image_files.sort(key=lambda f: int(f.split('_')[-1].split('.')[0]))

# Iterate over the sorted files
for image_name in image_files:
    masked_image_path = os.path.join(masked_images_dir, image_name)
    inverse_image_path = os.path.join(inverse_images_dir, image_name)
    save_path = os.path.join(output_dir, f"hue_modified_combined_{image_name}")

    # Check if the corresponding inverse image exists before combining
    if os.path.exists(inverse_image_path):
        combine_hue_modified_mask(masked_image_path, inverse_image_path, save_path, hue_shift=0.25)
    else:
        print(f"Warning: Missing inverse file for {image_name}. Skipping combination.")


## converting back hue change to video

In [None]:
# Example usage
frames_folder = '/kaggle/working/hue_combined_images'  # Replace with the folder containing your frames
output_video_path = "/kaggle/working/hue_combined_images_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)

## side by side video of original with Hue video.

In [None]:
from PIL import Image
import os
import subprocess
import shutil

# Directories for the input frames and output combined frames (switched)
dir1 = '/kaggle/working/output_frames'  # Formerly dir2https://accounts.google.com/b/0/AddMailService
dir2 = '/kaggle/working/hue_combined_images'  # Formerly dir1
output_dir = '/kaggle/working/hue_with_og_combined_frames'
video_output = '/kaggle/working/hue_with_og_output_video.mp4'

# Ensure the output directory exists and is empty
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)  # Remove the directory and its contents
os.makedirs(output_dir)  # Recreate the empty directory

# Remove the previous video if it exists
if os.path.exists(video_output):
    os.remove(video_output)

# Get sorted lists of the frames
frames1 = sorted([f for f in os.listdir(dir1) if f.endswith('.jpg')])
frames2 = sorted([f for f in os.listdir(dir2) if f.endswith('.png')])

# Iterate over both directories and combine images
for idx, (f1, f2) in enumerate(zip(frames1, frames2), start=1):
    img1 = Image.open(os.path.join(dir1, f1))
    img2 = Image.open(os.path.join(dir2, f2))
    
    # Assuming both images have the same height, concatenate side by side
    combined_img = Image.new('RGB', (img1.width + img2.width, img1.height))
    combined_img.paste(img1, (0, 0))
    combined_img.paste(img2, (img1.width, 0))
    
    # Save combined image with a sequential name like combined_frame_001.png
    combined_img.save(os.path.join(output_dir, f"combined_frame_{idx:03d}.png"))

print(f"Frames combined and saved in {output_dir}")

# List the files in the output directory to verify they exist
print("Files in output directory:", os.listdir(output_dir))

# Convert the combined frames into a video using ffmpeg
subprocess.run([
    'ffmpeg', '-framerate', '30', '-i', 
    f'{output_dir}/combined_frame_%03d.png', '-c:v', 
    'libx264', '-pix_fmt', 'yuv420p', video_output
])

print(f"Video saved as {video_output}")


# Mask replacement with another video

### replacement video Link required

In [None]:
import os
import numpy as np
from PIL import Image
import cv2

def replace_area_with_frames(image, mask, replacement_frames, frame_idx):
    """
    Replace the masked area of an image with a different video frame.

    Parameters:
    - image: NumPy array of the image to modify.
    - mask: Boolean NumPy array indicating the masked area.
    - replacement_frames: List of NumPy arrays, each representing a video frame to use as a replacement.
    - frame_idx: Int, the index of the current frame in the replacement sequence.
    """
    # Create a copy of the image to modify
    modified_image = image.copy()

    # Get the replacement frame, use the last one if index exceeds available frames
    replacement_frame = replacement_frames[min(frame_idx, len(replacement_frames) - 1)]

    # Resize the replacement frame to match the image size
    replacement_frame_resized = cv2.resize(replacement_frame, (image.shape[1], image.shape[0]))

    # Replace the masked area with the replacement frame
    modified_image[mask] = replacement_frame_resized[mask]

    return modified_image

def combine_mask_with_frames(masked_image_path, inverse_masked_image_path, replacement_frames, save_path, frame_idx):
    """
    Combine the masked areas from the masked image with the inverse-masked image, using video frames to fill the masked area.

    Parameters:
    - masked_image_path: String, path to the masked image.
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - replacement_frames: List of NumPy arrays, each representing a video frame to use as a replacement.
    - save_path: String, path where the combined image will be saved.
    - frame_idx: Int, the index of the current frame in the replacement sequence.
    """
    # Open images
    masked_image = Image.open(masked_image_path).convert("RGBA")
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Ensure images are the same size by resizing the inverse image
    if masked_image.size != inverse_masked_image.size:
        inverse_masked_image = inverse_masked_image.resize(masked_image.size)

    # Convert images to numpy arrays
    masked_array = np.array(masked_image)
    inverse_masked_array = np.array(inverse_masked_image)

    # Create a mask where the original mask was applied (non-zero areas in any color channel)
    mask = np.any(masked_array[..., :3] > 0, axis=-1)

    # Replace the masked area with frames from the video
    replaced_area = replace_area_with_frames(masked_array, mask, replacement_frames, frame_idx)

    # Replace inverse-masked image values with the replaced area image values where mask is true
    combined_array = inverse_masked_array.copy()
    combined_array[mask] = replaced_area[mask]

    # Convert back to image
    combined_image = Image.fromarray(combined_array)

    # Save the combined image
    combined_image.save(save_path)
    print(f"Combined image saved as {save_path}")

# Directory paths
masked_images_dir = "/kaggle/working/restored_frames"
inverse_images_dir = "/kaggle/working/inverse_restored_frames"
output_dir = "/kaggle/working/mask_replaced_combined_images"
replacement_video_path = "/kaggle/input/viedo-with-replacementy/Untitled video - Made with Clipchamp (1).mp4" # input replacement video link

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load the replacement video frames
replacement_frames = []
cap = cv2.VideoCapture(replacement_video_path)
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    replacement_frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA))
cap.release()

# Get and sort the list of image files
image_files = [f for f in os.listdir(masked_images_dir) if f.startswith("frame_") and f.endswith(".png")]
image_files.sort(key=lambda f: int(f.split('_')[-1].split('.')[0]))

# Iterate over the sorted files
for frame_idx, image_name in enumerate(image_files):
    masked_image_path = os.path.join(masked_images_dir, image_name)
    inverse_image_path = os.path.join(inverse_images_dir, image_name)
    save_path = os.path.join(output_dir, f"frame_combined_{image_name}")

    # Check if the corresponding inverse image exists before combining
    if os.path.exists(inverse_image_path):
        combine_mask_with_frames(masked_image_path, inverse_image_path, replacement_frames, save_path, frame_idx)
    else:
        print(f"Warning: Missing inverse file for {image_name}. Skipping combination.")


### replaced mask to video 

In [None]:
# Example usage
frames_folder = '/kaggle/working/mask_replaced_combined_images'  # Replace with the folder containing your frames
output_video_path = "/kaggle/working/mask_replaced_combined_images_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)

## side by side video of original with mask replaced video.

In [None]:
from PIL import Image
import os
import subprocess
import shutil

# Directories for the input frames and output combined frames (switched)
dir1 = '/kaggle/working/output_frames'  
dir2 = '/kaggle/working/mask_replaced_combined_images'  
output_dir = '/kaggle/working/mask_replacement_with_orginal_combined_frames'
video_output = '/kaggle/working/mask_replacement_with_orginal_output_video.mp4'

# Ensure the output directory exists and is empty
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)  # Remove the directory and its contents
os.makedirs(output_dir)  # Recreate the empty directory

# Remove the previous video if it exists
if os.path.exists(video_output):
    os.remove(video_output)

# Get sorted lists of the frames
frames1 = sorted([f for f in os.listdir(dir1) if f.endswith('.jpg')])
frames2 = sorted([f for f in os.listdir(dir2) if f.endswith('.png')])

# Iterate over both directories and combine images
for idx, (f1, f2) in enumerate(zip(frames1, frames2), start=1):
    img1 = Image.open(os.path.join(dir1, f1))
    img2 = Image.open(os.path.join(dir2, f2))
    
    # Assuming both images have the same height, concatenate side by side
    combined_img = Image.new('RGB', (img1.width + img2.width, img1.height))
    combined_img.paste(img1, (0, 0))
    combined_img.paste(img2, (img1.width, 0))
    
    # Save combined image with a sequential name like combined_frame_001.png
    combined_img.save(os.path.join(output_dir, f"combined_frame_{idx:03d}.png"))

print(f"Frames combined and saved in {output_dir}")

# List the files in the output directory to verify they exist
print("Files in output directory:", os.listdir(output_dir))

# Convert the combined frames into a video using ffmpeg
subprocess.run([
    'ffmpeg', '-framerate', '30', '-i', 
    f'{output_dir}/combined_frame_%03d.png', '-c:v', 
    'libx264', '-pix_fmt', 'yuv420p', video_output
])

print(f"Video saved as {video_output}")


# Masked area glow effect in video

In [None]:
from PIL import Image, ImageFilter

def apply_blur_to_masked_area(image, mask, blur_radius=10):
    """
    Apply a blur effect to the masked area of an image.

    Parameters:
    - image: PIL Image object of the original image.
    - mask: Boolean NumPy array indicating the masked area.
    - blur_radius: Integer, the radius of the Gaussian blur for the blur effect.
    """
    # Convert image to numpy array
    image_array = np.array(image)

    # Create a mask image
    mask_image = Image.fromarray((mask * 255).astype('uint8'), mode='L')

    # Apply a Gaussian blur to the mask image
    blurred_mask_image = mask_image.filter(ImageFilter.GaussianBlur(radius=blur_radius))

    # Convert the blurred mask to RGB
    blurred_mask_image = blurred_mask_image.convert('RGB')
    blurred_mask_array = np.array(blurred_mask_image)

    # Create an image with the same dimensions as the original image
    blurred_area = np.zeros_like(image_array[..., :3])
    blurred_area[mask] = blurred_mask_array[mask]

    # Combine the blurred area with the original image
    combined_array = np.where(blurred_area > 0, blurred_area, image_array[..., :3])
    combined_image = Image.fromarray(np.uint8(combined_array))

    # Preserve the alpha channel from the original image
    alpha_channel = image_array[..., 3]
    combined_image = Image.fromarray(np.dstack((combined_array, alpha_channel)))

    return combined_image

def combine_and_apply_blur(masked_image_path, inverse_masked_image_path, save_path, blur_radius):
    """
    Apply a blur effect to the masked image and save the result.

    Parameters:
    - masked_image_path: String, path to the masked image (used to extract the mask).
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - save_path: String, path where the final image will be saved.
    - blur_radius: Integer, the radius of the Gaussian blur for the blur effect.
    """
    # Open inverse-masked image
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Extract the mask from the masked image
    masked_image = Image.open(masked_image_path).convert("L")
    mask = np.array(masked_image) > 0

    # Apply blur effect to the masked area
    blurred_image = apply_blur_to_masked_area(inverse_masked_image, mask, blur_radius)

    # Save the final image
    blurred_image.save(save_path)
    print(f"Final image with blur effect saved as {save_path}")

#     # Display the final image
#     plt.imshow(blurred_image)
#     plt.axis('off')
#     plt.show()

# Directory paths
masked_images_dir = "/kaggle/working/restored_frames"
inverse_images_dir = "/kaggle/working/inverse_restored_frames"
output_dir = "/kaggle/working/blur_combined_images"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get and sort the list of image files
image_files = [f for f in os.listdir(masked_images_dir) if f.startswith("frame_") and f.endswith(".png")]
image_files.sort(key=lambda f: int(f.split('_')[-1].split('.')[0]))

# Define blur radius
blur_radius = 10

# Iterate over the sorted files
for image_name in image_files:
    masked_image_path = os.path.join(masked_images_dir, image_name)
    inverse_image_path = os.path.join(inverse_images_dir, image_name)
    save_path = os.path.join(output_dir, f"blur_combined_{image_name}")

    # Check if the corresponding inverse image exists before combining
    if os.path.exists(inverse_image_path):
        combine_and_apply_blur(masked_image_path, inverse_image_path, save_path, blur_radius)
    else:
        print(f"Warning: Missing inverse file for {image_name}. Skipping combination.")


### converting glow effect frames into video 

In [None]:
# Example usage
frames_folder = '/kaggle/working/blur_combined_images'  # Replace with the folder containing your frames
output_video_path = "/kaggle/working/blur_combined_images_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)

# Generative AI propagation in video

In [None]:
!pip install stability-sdk

## Single API mask video generation

### single API key code uses less stability AI credits can generate upto ~110 frames using 25 credits at below given configuration in code.

### to generate API key from stability AI , signup on statbility ai platform (gives 25 $ free credit on new account) , copy API key and paste in the below code

#### Note: Due to generate high no. of frames quality is significantly poor for single API key

In [None]:
import os
import io
import warnings
from PIL import Image
from stability_sdk import client
import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation

# Our Host URL should not be prepended with "https" nor should it have a trailing slash.
os.environ['STABILITY_HOST'] = 'grpc.stability.ai:443'

# Sign up for an account at the following link to get an API Key.
# https://platform.stability.ai/

# Click on the following link once you have created an account to be taken to your API Key.
# https://platform.stability.ai/account/keys

# Paste your API Key below.

os.environ['STABILITY_KEY'] = 'sk-23mieeVXXXXXXXXXAegcZW3DZpGIz0M5'

In [None]:
# Set up our connection to the API.
stability_api = client.StabilityInference(
    key=os.environ['STABILITY_KEY'], # API Key reference.
    verbose=True, # Print debug messages.
    engine="stable-diffusion-xl-1024-v1-0", # Set the engine to use for generation.
    # Check out the following link for a list of available engines: https://platform.stability.ai/docs/features/api-parameters#engine
)

In [None]:
import os
import io
import warnings
from PIL import Image
import matplotlib.pyplot as plt

def clear_output_directory(directory):
    """
    Remove all files in the given directory.
    """
    if os.path.exists(directory):
        for file in os.listdir(directory):
            file_path = os.path.join(directory, file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(f"Failed to delete {file_path}. Reason: {e}")

def resize_image(image_path, output_path, max_size=1024):
    """
    Resize an image if it exceeds the max_size dimension.
    """
    # Open the image
    image = Image.open(image_path)

    # Get the current width and height of the image
    width, height = image.size

    # Calculate the scaling factor
    if width > height:
        scaling_factor = max_size / width
    else:
        scaling_factor = max_size / height

    # Only resize if the image is larger than the max_size
    if scaling_factor < 1:
        # Calculate new dimensions
        new_width = int(width * scaling_factor)
        new_height = int(height * scaling_factor)

        # Resize the image
        image_resized = image.resize((new_width, new_height))

        # Save the resized image
        image_resized.save(output_path)
        print(f"Image resized to {new_width}x{new_height} and saved as {output_path}")
    else:
        # Save the original image without resizing
        image.save(output_path)
        print(f"Image is already within the size limits and saved as {output_path}")

def generate_image_from_masked(input_image_path, output_image_path):
    """
    Generate a new image from a masked image using an image-to-image model.
    """
    # Open and possibly resize the image
    resized_image_path = '/kaggle/working/temp_resized_image.jpg'
    resize_image(input_image_path, resized_image_path)

    # Open the resized image
    img = Image.open(resized_image_path)

    # Get the dimensions of the image
    width, height = img.size

    # Set up our initial generation parameters.
    answers = stability_api.generate(
        prompt="bottle with glowing effect holding magical potion, alphonse mucha and simon stalenhag style",
        seed = 69696969,
        init_image=img,  # Assign our previously generated img as our Initial Image for transformation.
        start_schedule=0.6,  # Set the strength of our prompt in relation to our initial image.
        steps=30,  # Amount of inference steps performed on image generation. Defaults to 30.
        cfg_scale=10.0,  # Influences how strongly your generation is guided to match your prompt.
        width=width,  # Generation width
        height=height,  # Generation height
        sampler=generation.SAMPLER_DDIM,  # Sampler type
        style_preset="comic-book"  # Style preset
    )

    # Process the response and save the image
    for resp in answers:
        for artifact in resp.artifacts:
            if artifact.finish_reason == generation.FILTER:
                warnings.warn(
                    "Your request activated the API's safety filters and could not be processed."
                    "Please modify the prompt and try again.")
            if artifact.type == generation.ARTIFACT_IMAGE:
                img2 = Image.open(io.BytesIO(artifact.binary))
                img2.save(output_image_path)
                print(f"Generated image saved as {output_image_path}")

# Directory paths
masked_images_dir = '/kaggle/working/restored_frames'
output_gen_dir = '/kaggle/working/mask_gen'
os.makedirs(output_gen_dir, exist_ok=True)

# Clear the output directory
clear_output_directory(output_gen_dir)

# Iterate over each masked image and apply image-to-image generation
for masked_image_name in os.listdir(masked_images_dir):
    masked_image_path = os.path.join(masked_images_dir, masked_image_name)
    output_image_path = os.path.join(output_gen_dir, f"gen_{masked_image_name}")

    # Generate new image from the masked image
    generate_image_from_masked(masked_image_path, output_image_path)

    # Optional: Display the generated image
    out_img = Image.open(output_image_path)
    plt.imshow(out_img)
    plt.title(f"Generated from {masked_image_name}")
    plt.show()


In [None]:
# Example usage
frames_folder = '/kaggle/working/mask_gen'  # Replace with the folder containing your frames
output_video_path = "/kaggle/working/mask_gen_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)

In [None]:
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt

def combine_masked_regions(masked_image_path, inverse_masked_image_path, save_path):
    """
    Combine the original mask areas from the masked image with the inverse-masked image.

    Parameters:
    - masked_image_path: String, path to the masked image.
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - save_path: String, path where the combined image will be saved.
    """
    # Open images
    masked_image = Image.open(masked_image_path).convert("RGBA")
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Ensure images are the same size by resizing the inverse image
    if masked_image.size != inverse_masked_image.size:
        inverse_masked_image = inverse_masked_image.resize(masked_image.size)

    # Convert images to numpy arrays
    masked_array = np.array(masked_image)
    inverse_masked_array = np.array(inverse_masked_image)

    # Create a mask where the original mask was applied (non-zero areas in any color channel)
    mask = np.any(masked_array[..., :3] > 30, axis=-1)

    # Replace inverse-masked image values with masked image values where mask is true
    combined_array = inverse_masked_array.copy()
    combined_array[mask] = masked_array[mask]

    # Convert back to image
    combined_image = Image.fromarray(combined_array)

    # Save the combined image
    combined_image.save(save_path)
    print(f"Combined image saved as {save_path}")

#     # Display the combined image
#     plt.imshow(combined_image)
#     plt.axis('off')
#     plt.show()

# Define directory paths
masked_images_dir = "/kaggle/working/mask_gen"
inverse_images_dir = "/kaggle/working/inverse_restored_frames"
output_dir = "/kaggle/working/Generative_combined_images"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get lists of files in the masked directory
masked_images = sorted(os.listdir(masked_images_dir))

# Process files with matching names based on pattern
for masked_image_name in masked_images:
    if masked_image_name.startswith("gen_frame_") and masked_image_name.endswith(".png"):
        # Extract the index number from the masked image name
        index = masked_image_name[len("gen_frame_"):-len(".png")]

        # Generate the corresponding inverse image name
        inverse_image_name = f"frame_{index}.png"

        masked_image_path = os.path.join(masked_images_dir, masked_image_name)
        inverse_image_path = os.path.join(inverse_images_dir, inverse_image_name)
        save_path = os.path.join(output_dir, f"combined_frame_{index}.png")

        # Check if both files exist before combining
        if os.path.exists(masked_image_path) and os.path.exists(inverse_image_path):
            combine_masked_regions(masked_image_path, inverse_image_path, save_path)
        else:
            print(f"Warning: Missing files for frame {index}. Skipping combination.")


### frames to video 

In [None]:
# Example usage
frames_folder = '/kaggle/working/Generative_combined_images'  # Replace with the folder containing your frames
output_video_path = "/kaggle/working/Generative_combined_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)

## generating using multiple APIs

### using Multiple keys with better output of image to image generation, the below code can generate ~ 50 frames per 25 credits or 1 free new signup. 

In [None]:
import os
import io
import warnings
from PIL import Image
import matplotlib.pyplot as plt
from stability_sdk import client
import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation

# List of API keys
api_keys = [
    'sk-3GPp1EOphrXXXXXXXXXX3dmwrbji1iPK3',
    'sk-6TygJFuBfiQWc7XXXXXXXXXXXqj8aMncmLYrYqpwE1Lv'
    # Add more API keys here
]

# Directory paths
masked_images_dir = '/kaggle/working/restored_frames'
output_gen_dir = '/kaggle/working/HD_mask_gen'

os.makedirs(output_gen_dir, exist_ok=True)

def initialize_stability_api(api_key):
    """
    Initialize the Stability API client with the given API key.
    """
    return client.StabilityInference(
        key=api_key,  # API Key reference.
        verbose=True,  # Print debug messages.
        engine="stable-diffusion-xl-1024-v1-0",  # Set the engine to use for generation.
    )

def resize_image(image_path, output_path, max_size=1024):
    """
    Resize an image if it exceeds the max_size dimension.
    """
    # Open the image
    image = Image.open(image_path)

    # Get the current width and height of the image
    width, height = image.size

    # Calculate the scaling factor
    if width > height:
        scaling_factor = max_size / width
    else:
        scaling_factor = max_size / height

    # Only resize if the image is larger than the max_size
    if scaling_factor < 1:
        # Calculate new dimensions
        new_width = int(width * scaling_factor)
        new_height = int(height * scaling_factor)

        # Resize the image
        image_resized = image.resize((new_width, new_height))

        # Save the resized image
        image_resized.save(output_path)
        print(f"Image resized to {new_width}x{new_height} and saved as {output_path}")
    else:
        # Save the original image without resizing
        image.save(output_path)
        print(f"Image is already within the size limits and saved as {output_path}")

def generate_image_from_masked(api, input_image_path, output_image_path):
    """
    Generate a new image from a masked image using an image-to-image model.
    """
    # Open and possibly resize the image
    resized_image_path = '/kaggle/working/temp_resized_image.jpg'
    resize_image(input_image_path, resized_image_path)

    # Open the resized image
    img = Image.open(resized_image_path)

    # Get the dimensions of the image
    width, height = img.size

    # Set up our initial generation parameters.
    answers = api.generate(
        prompt="soccer ball covered in flames,blazing fireball,eldenring fireball,flames, shiny golden",
        init_image=img,  # Assign our previously generated img as our Initial Image for transformation.
        seed = 69696969,
        start_schedule=0.6,  # Set the strength of our prompt in relation to our initial image.
        steps=65,  # Amount of inference steps performed on image generation. Defaults to 30.
        cfg_scale=10.0,  # Influences how strongly your generation is guided to match your prompt.
        width=width,  # Generation width
        height=height,  # Generation height
        sampler=generation.SAMPLER_K_DPMPP_SDE,  # Sampler type
        style_preset="fantasy-art"  # Style preset
    )

    # Process the response and save the image
    for resp in answers:
        for artifact in resp.artifacts:
            if artifact.finish_reason == generation.FILTER:
                warnings.warn(
                    "Your request activated the API's safety filters and could not be processed."
                    "Please modify the prompt and try again.")
            if artifact.type == generation.ARTIFACT_IMAGE:
                img2 = Image.open(io.BytesIO(artifact.binary))
                img2.save(output_image_path)
                print(f"Generated image saved as {output_image_path}")

# Initialize the first Stability API client
stability_api = initialize_stability_api(api_keys[0])

# Iterate over each masked image and apply image-to-image generation
for i, masked_image_name in enumerate(os.listdir(masked_images_dir)):
    # Change API key every 50 frames
    if i > 0 and i % 50 == 0:
        api_index = (i // 50) % len(api_keys)  # Calculate the API key index
        stability_api = initialize_stability_api(api_keys[api_index])

    masked_image_path = os.path.join(masked_images_dir, masked_image_name)
    output_image_path = os.path.join(output_gen_dir, f"gen_{masked_image_name}")

    # Generate new image from the masked image
    generate_image_from_masked(stability_api, masked_image_path, output_image_path)

    # Optional: Display the generated image
    out_img = Image.open(output_image_path)
    plt.imshow(out_img)
    plt.title(f"Generated from {masked_image_name}")
    plt.show()


In [None]:
# Example usage
frames_folder = '/kaggle/working/HD_mask_gen'  # Replace with the folder containing your frames
output_video_path = "/kaggle/working/HD_mask_gen_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)

In [None]:
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt

def combine_masked_regions(masked_image_path, inverse_masked_image_path, save_path):
    """
    Combine the original mask areas from the masked image with the inverse-masked image.

    Parameters:
    - masked_image_path: String, path to the masked image.
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - save_path: String, path where the combined image will be saved.
    """
    # Open images
    masked_image = Image.open(masked_image_path).convert("RGBA")
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Ensure images are the same size by resizing the inverse image
    if masked_image.size != inverse_masked_image.size:
        inverse_masked_image = inverse_masked_image.resize(masked_image.size)

    # Convert images to numpy arrays
    masked_array = np.array(masked_image)
    inverse_masked_array = np.array(inverse_masked_image)

    # Create a mask where the original mask was applied (non-zero areas in any color channel)
    mask = np.any(masked_array[..., :3] > 30, axis=-1)

    # Replace inverse-masked image values with masked image values where mask is true
    combined_array = inverse_masked_array.copy()
    combined_array[mask] = masked_array[mask]

    # Convert back to image
    combined_image = Image.fromarray(combined_array)

    # Save the combined image
    combined_image.save(save_path)
    print(f"Combined image saved as {save_path}")

#     # Display the combined image
#     plt.imshow(combined_image)
#     plt.axis('off')
#     plt.show()

# Define directory paths
masked_images_dir = "/kaggle/working/HD_mask_gen"
inverse_images_dir = "/kaggle/working/inverse_restored_frames"
output_dir = "/kaggle/working/HD_Generative_combined_images"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get lists of files in the masked directory
masked_images = sorted(os.listdir(masked_images_dir))

# Process files with matching names based on pattern
for masked_image_name in masked_images:
    if masked_image_name.startswith("gen_frame_") and masked_image_name.endswith(".png"):
        # Extract the index number from the masked image name
        index = masked_image_name[len("gen_frame_"):-len(".png")]

        # Generate the corresponding inverse image name
        inverse_image_name = f"frame_{index}.png"

        masked_image_path = os.path.join(masked_images_dir, masked_image_name)
        inverse_image_path = os.path.join(inverse_images_dir, inverse_image_name)
        save_path = os.path.join(output_dir, f"combined_frame_{index}.png")

        # Check if both files exist before combining
        if os.path.exists(masked_image_path) and os.path.exists(inverse_image_path):
            combine_masked_regions(masked_image_path, inverse_image_path, save_path)
        else:
            print(f"Warning: Missing files for frame {index}. Skipping combination.")


In [None]:
# Example usage
frames_folder = '/kaggle/working/HD_Generative_combined_images'  # Replace with the folder containing your frames
output_video_path = "/kaggle/working/HD_Generative_combined_output_video.mp4"  # Desired output video file path

frames_to_video(frames_folder, output_video_path, fps=30)

## side by side video of original with Img2Img generated video.

In [None]:
from PIL import Image
import os
import subprocess
import shutil

# Directories for the input frames and output combined frames (switched)
dir1 = '/kaggle/working/output_frames'  # Formerly dir2
dir2 = '/kaggle/working/HD_Generative_combined_images'  # Formerly dir1
output_dir = '/kaggle/working/genai_with_replacement_combined_frames'
video_output = '/kaggle/working/genai_with_replacement_output_video.mp4'

# Ensure the output directory exists and is empty
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)  # Remove the directory and its contents
os.makedirs(output_dir)  # Recreate the empty directory

# Remove the previous video if it exists
if os.path.exists(video_output):
    os.remove(video_output)

# Get sorted lists of the frames
frames1 = sorted([f for f in os.listdir(dir1) if f.endswith('.jpg')])
frames2 = sorted([f for f in os.listdir(dir2) if f.endswith('.png')])

# Iterate over both directories and combine images
for idx, (f1, f2) in enumerate(zip(frames1, frames2), start=1):
    img1 = Image.open(os.path.join(dir1, f1))
    img2 = Image.open(os.path.join(dir2, f2))
    
    # Resize the larger image to match the height of the smaller one while maintaining the aspect ratio
    if img1.height > img2.height:
        img1 = img1.resize((int(img1.width * (img2.height / img1.height)), img2.height), Image.LANCZOS)
    elif img2.height > img1.height:
        img2 = img2.resize((int(img2.width * (img1.height / img2.height)), img1.height), Image.LANCZOS)
    
    # Combine images side by side
    combined_img = Image.new('RGB', (img1.width + img2.width, img1.height))
    combined_img.paste(img1, (0, 0))
    combined_img.paste(img2, (img1.width, 0))
    
    # Save combined image with a sequential name like combined_frame_001.png
    combined_img.save(os.path.join(output_dir, f"combined_frame_{idx:03d}.png"))

print(f"Frames combined and saved in {output_dir}")

# List the files in the output directory to verify they exist
print("Files in output directory:", os.listdir(output_dir))

# Convert the combined frames into a video using ffmpeg
subprocess.run([
    'ffmpeg', '-framerate', '30', '-i', 
    f'{output_dir}/combined_frame_%03d.png', '-c:v', 
    'libx264', '-pix_fmt', 'yuv420p', video_output
])

print(f"Video saved as {video_output}")


# Thank you!!!