Spaces:

nakas
/

360_metadata_image_injector

Running

File size: 10,453 Bytes

import gradio as gr
import cv2
import numpy as np
import os
import gc
from tqdm import tqdm
import logging
from PIL import Image
from datetime import datetime
import struct

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def create_xmp_block(width, height):
    """Create XMP metadata block following ExifTool's exact format."""
    xmp = (
        f'<?xpacket begin="ï»¿" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
        f'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="ExifTool">\n'
        f'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
        f'<rdf:Description rdf:about=""\n'
        f'xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"\n'
        f'GPano:ProjectionType="equirectangular"\n'
        f'GPano:UsePanoramaViewer="True"\n'
        f'GPano:FullPanoWidthPixels="{width}"\n'
        f'GPano:FullPanoHeightPixels="{height}"\n'
        f'GPano:CroppedAreaImageWidthPixels="{width}"\n'
        f'GPano:CroppedAreaImageHeightPixels="{height}"\n'
        f'GPano:CroppedAreaLeftPixels="0"\n'
        f'GPano:CroppedAreaTopPixels="0"/>\n'
        f'</rdf:RDF>\n'
        f'</x:xmpmeta>\n'
        f'<?xpacket end="w"?>'
    )
    return xmp

def write_xmp_to_jpg(input_path, output_path, width, height):
    """Write XMP metadata to JPEG file following ExifTool's method."""
    # Read the original JPEG
    with open(input_path, 'rb') as f:
        data = f.read()
    
    # Find the start of image marker
    if data[0:2] != b'\xFF\xD8':
        raise ValueError("Not a valid JPEG file")
    
    # Create XMP data
    xmp_data = create_xmp_block(width, height)
    
    # Create APP1 segment for XMP
    app1_marker = b'\xFF\xE1'
    xmp_header = b'http://ns.adobe.com/xap/1.0/\x00'
    xmp_bytes = xmp_data.encode('utf-8')
    length = len(xmp_header) + len(xmp_bytes) + 2  # +2 for length bytes
    length_bytes = struct.pack('>H', length)
    
    # Construct new file content
    output = bytearray()
    output.extend(data[0:2])  # SOI marker
    output.extend(app1_marker)
    output.extend(length_bytes)
    output.extend(xmp_header)
    output.extend(xmp_bytes)
    output.extend(data[2:])  # Rest of the original file
    
    # Write the new file
    with open(output_path, 'wb') as f:
        f.write(output)

def preprocess_frame(frame):
    """Preprocess frame with improved feature detection"""
    target_height = 1080
    aspect_ratio = frame.shape[1] / frame.shape[0]
    target_width = int(target_height * aspect_ratio)
    frame = cv2.resize(frame, (target_width, target_height))

    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    cl = clahe.apply(l)
    enhanced = cv2.merge((cl,a,b))
    enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)

    return enhanced

def extract_frames(video_path, num_frames=24):
    """Extract frames with progress tracking"""
    try:
        logger.info(f"Opening video: {video_path}")
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            raise Exception("Could not open video file")

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_indices = np.linspace(0, total_frames-1, num_frames, dtype=int)
        frames = []

        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ret, frame = cap.read()
            if ret:
                processed = preprocess_frame(frame)
                frames.append(processed)
            gc.collect()

        cap.release()
        logger.info(f"Extracted {len(frames)} frames")
        return frames

    except Exception as e:
        if 'cap' in locals():
            cap.release()
        raise Exception(f"Frame extraction failed: {str(e)}")

def create_360_panorama(frames):
    """Create an equirectangular panorama with better stitching and wide-angle adjustment"""
    try:
        if len(frames) < 2:
            raise Exception("Need at least 2 frames")

        # iPhone wide angle is typically around 120 degrees vertical FOV
        # We'll adjust the output size to account for this
        vertical_fov = 120  # degrees
        total_vertical_fov = 180  # full equirectangular height

        # Calculate padding needed
        padding_ratio = (total_vertical_fov - vertical_fov) / (2 * total_vertical_fov)
        
        # Create stitcher with custom settings
        stitcher = cv2.Stitcher.create(cv2.Stitcher_PANORAMA)
        stitcher.setPanoConfidenceThresh(0.8)

        logger.info("Starting panorama stitching...")
        status, panorama = stitcher.stitch(frames)

        if status != cv2.Stitcher_OK:
            raise Exception(f"Stitching failed with status {status}")

        # Calculate target dimensions
        target_height = 1080
        target_width = target_height * 2  # 2:1 aspect ratio for equirectangular

        # Resize stitched panorama
        panorama = cv2.resize(panorama, (target_width, int(target_height * (1 - 2*padding_ratio))))

        # Create final image with padding
        final_panorama = np.zeros((target_height, target_width, 3), dtype=np.uint8)
        
        # Calculate padding pixels
        pad_pixels = int(target_height * padding_ratio)
        
        # Place the panorama in the middle
        final_panorama[pad_pixels:target_height-pad_pixels, :] = panorama

        # Apply slight feathering at the edges to avoid hard transitions
        feather_size = int(pad_pixels * 0.3)
        for i in range(feather_size):
            alpha = i / feather_size
            # Feather top
            final_panorama[pad_pixels-feather_size+i, :] = \
                (panorama[0, :] * alpha).astype(np.uint8)
            # Feather bottom
            final_panorama[target_height-pad_pixels+i, :] = \
                (panorama[-1, :] * (1-alpha)).astype(np.uint8)

        logger.info(f"Created panorama of size {final_panorama.shape} with vertical FOV adjustment")
        return final_panorama

    except Exception as e:
        raise Exception(f"360° panorama creation failed: {str(e)}")

def equirect_to_cubemap(equirect):
    """Convert equirectangular image to cubemap"""
    face_size = equirect.shape[0] // 2
    cubemap = np.zeros((face_size * 3, face_size * 4, 3), dtype=np.uint8)
    
    rotations = [
        (0, 0, 0),    # front
        (0, 90, 0),   # right
        (0, 180, 0),  # back
        (0, 270, 0),  # left
        (-90, 0, 0),  # top
        (90, 0, 0)    # bottom
    ]

    for i, rotation in enumerate(rotations):
        x = (i % 4) * face_size
        y = (i // 4) * face_size

        R = cv2.Rodrigues(np.array([rotation[0] * np.pi / 180,
                                   rotation[1] * np.pi / 180,
                                   rotation[2] * np.pi / 180]))[0]

        for u in range(face_size):
            for v in range(face_size):
                x_3d = (2 * u / face_size - 1)
                y_3d = (2 * v / face_size - 1)
                z_3d = 1.0

                point = R.dot(np.array([x_3d, y_3d, z_3d]))
                theta = np.arctan2(point[0], point[2])
                phi = np.arctan2(point[1], np.sqrt(point[0]**2 + point[2]**2))

                u_equi = int((theta + np.pi) * equirect.shape[1] / (2 * np.pi))
                v_equi = int((phi + np.pi/2) * equirect.shape[0] / np.pi)

                if 0 <= u_equi < equirect.shape[1] and 0 <= v_equi < equirect.shape[0]:
                    cubemap[y+v, x+u] = equirect[v_equi, u_equi]

    return cubemap

def process_video(video):
    """Main processing function for Gradio interface"""
    try:
        if video is None:
            return None, None, "Please upload a video file."
            
        video_path = video
        if not os.path.exists(video_path):
            return None, None, "Error: Video file not found."

        # Log the working directory and file permission
        logger.info(f"Working directory: {os.getcwd()}")
        logger.info(f"Video path exists: {os.path.exists(video_path)}")
        logger.info(f"Video path permissions: {oct(os.stat(video_path).st_mode)[-3:]}")

        # Extract frames
        frames = extract_frames(video_path, num_frames=24)
        if not frames:
            return None, None, "Error: No frames could be extracted from the video."

        # Create panorama
        equirect = create_360_panorama(frames)
        logger.info("Created equirectangular panorama")
        
        # Create cubemap
        cubemap = equirect_to_cubemap(equirect)
        logger.info("Created cubemap")

        # Save paths
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        equirect_path = f"360_photo_{timestamp}.jpg"
        cubemap_path = f"cubemap_{timestamp}.jpg"

        # Save equirectangular image
        logger.info("Saving equirectangular image...")
        cv2.imwrite(equirect_path, equirect)

        # Add metadata to equirectangular image
        height, width = equirect.shape[:2]
        write_xmp_to_jpg(equirect_path, equirect_path, width, height)
        logger.info("Added 360 metadata to equirectangular image")

        # Save cubemap
        logger.info("Saving cubemap...")
        cv2.imwrite(cubemap_path, cubemap)

        return equirect_path, cubemap_path, "Processing completed successfully!"

    except Exception as e:
        logger.error(f"Error in process_video: {str(e)}")
        return None, None, f"Error during processing: {str(e)}"

# Create Gradio interface
iface = gr.Interface(
    fn=process_video,
    inputs=gr.Video(label="Upload 360° Video"),
    outputs=[
        gr.Image(label="360° Photo (with metadata)"),
        gr.Image(label="Cubemap View"),
        gr.Textbox(label="Status")
    ],
    title="360° Video to Photo Converter",
    description="""
    Upload a 360° panoramic video (shot with iPhone wide-angle lens) to convert it into:
    1. 360° Photo with proper metadata (can be viewed in Google Photos, Facebook, etc.)
    2. Cubemap view
    
    Tips for best results:
    - Keep video length under 30 seconds
    - Ensure steady camera motion
    - Video should complete a full 360° rotation
    - Maintain consistent camera height
    - Good lighting conditions help with stitching
    """,
    flagging_mode="never"
)

# Launch with queue
if __name__ == "__main__":
    iface.queue().launch(
        server_name="0.0.0.0",
        server_port=7860
    )