import gradio as gr import cv2 import numpy as np import os import gc from tqdm import tqdm import logging from PIL import Image from datetime import datetime import struct # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def create_xmp_block(width, height): """Create XMP metadata block following ExifTool's exact format.""" xmp = ( f'\n' f'\n' f'\n' f'\n' f'\n' f'\n' f'' ) return xmp def write_xmp_to_jpg(input_path, output_path, width, height): """Write XMP metadata to JPEG file following ExifTool's method.""" # Read the original JPEG with open(input_path, 'rb') as f: data = f.read() # Find the start of image marker if data[0:2] != b'\xFF\xD8': raise ValueError("Not a valid JPEG file") # Create XMP data xmp_data = create_xmp_block(width, height) # Create APP1 segment for XMP app1_marker = b'\xFF\xE1' xmp_header = b'http://ns.adobe.com/xap/1.0/\x00' xmp_bytes = xmp_data.encode('utf-8') length = len(xmp_header) + len(xmp_bytes) + 2 # +2 for length bytes length_bytes = struct.pack('>H', length) # Construct new file content output = bytearray() output.extend(data[0:2]) # SOI marker output.extend(app1_marker) output.extend(length_bytes) output.extend(xmp_header) output.extend(xmp_bytes) output.extend(data[2:]) # Rest of the original file # Write the new file with open(output_path, 'wb') as f: f.write(output) def preprocess_frame(frame): """Preprocess frame with improved feature detection""" target_height = 1080 aspect_ratio = frame.shape[1] / frame.shape[0] target_width = int(target_height * aspect_ratio) frame = cv2.resize(frame, (target_width, target_height)) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB) l, a, b = cv2.split(lab) cl = clahe.apply(l) enhanced = cv2.merge((cl,a,b)) enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR) return enhanced def extract_frames(video_path, num_frames=24): """Extract frames with progress tracking""" try: logger.info(f"Opening video: {video_path}") cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise Exception("Could not open video file") total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) frame_indices = np.linspace(0, total_frames-1, num_frames, dtype=int) frames = [] for idx in frame_indices: cap.set(cv2.CAP_PROP_POS_FRAMES, idx) ret, frame = cap.read() if ret: processed = preprocess_frame(frame) frames.append(processed) gc.collect() cap.release() logger.info(f"Extracted {len(frames)} frames") return frames except Exception as e: if 'cap' in locals(): cap.release() raise Exception(f"Frame extraction failed: {str(e)}") def create_360_panorama(frames): """Create an equirectangular panorama with better stitching and wide-angle adjustment""" try: if len(frames) < 2: raise Exception("Need at least 2 frames") # iPhone wide angle is typically around 120 degrees vertical FOV # We'll adjust the output size to account for this vertical_fov = 120 # degrees total_vertical_fov = 180 # full equirectangular height # Calculate padding needed padding_ratio = (total_vertical_fov - vertical_fov) / (2 * total_vertical_fov) # Create stitcher with custom settings stitcher = cv2.Stitcher.create(cv2.Stitcher_PANORAMA) stitcher.setPanoConfidenceThresh(0.8) logger.info("Starting panorama stitching...") status, panorama = stitcher.stitch(frames) if status != cv2.Stitcher_OK: raise Exception(f"Stitching failed with status {status}") # Calculate target dimensions target_height = 1080 target_width = target_height * 2 # 2:1 aspect ratio for equirectangular # Resize stitched panorama panorama = cv2.resize(panorama, (target_width, int(target_height * (1 - 2*padding_ratio)))) # Create final image with padding final_panorama = np.zeros((target_height, target_width, 3), dtype=np.uint8) # Calculate padding pixels pad_pixels = int(target_height * padding_ratio) # Place the panorama in the middle final_panorama[pad_pixels:target_height-pad_pixels, :] = panorama # Apply slight feathering at the edges to avoid hard transitions feather_size = int(pad_pixels * 0.3) for i in range(feather_size): alpha = i / feather_size # Feather top final_panorama[pad_pixels-feather_size+i, :] = \ (panorama[0, :] * alpha).astype(np.uint8) # Feather bottom final_panorama[target_height-pad_pixels+i, :] = \ (panorama[-1, :] * (1-alpha)).astype(np.uint8) logger.info(f"Created panorama of size {final_panorama.shape} with vertical FOV adjustment") return final_panorama except Exception as e: raise Exception(f"360° panorama creation failed: {str(e)}") def equirect_to_cubemap(equirect): """Convert equirectangular image to cubemap""" face_size = equirect.shape[0] // 2 cubemap = np.zeros((face_size * 3, face_size * 4, 3), dtype=np.uint8) rotations = [ (0, 0, 0), # front (0, 90, 0), # right (0, 180, 0), # back (0, 270, 0), # left (-90, 0, 0), # top (90, 0, 0) # bottom ] for i, rotation in enumerate(rotations): x = (i % 4) * face_size y = (i // 4) * face_size R = cv2.Rodrigues(np.array([rotation[0] * np.pi / 180, rotation[1] * np.pi / 180, rotation[2] * np.pi / 180]))[0] for u in range(face_size): for v in range(face_size): x_3d = (2 * u / face_size - 1) y_3d = (2 * v / face_size - 1) z_3d = 1.0 point = R.dot(np.array([x_3d, y_3d, z_3d])) theta = np.arctan2(point[0], point[2]) phi = np.arctan2(point[1], np.sqrt(point[0]**2 + point[2]**2)) u_equi = int((theta + np.pi) * equirect.shape[1] / (2 * np.pi)) v_equi = int((phi + np.pi/2) * equirect.shape[0] / np.pi) if 0 <= u_equi < equirect.shape[1] and 0 <= v_equi < equirect.shape[0]: cubemap[y+v, x+u] = equirect[v_equi, u_equi] return cubemap def process_video(video): """Main processing function for Gradio interface""" try: if video is None: return None, None, "Please upload a video file." video_path = video if not os.path.exists(video_path): return None, None, "Error: Video file not found." # Log the working directory and file permission logger.info(f"Working directory: {os.getcwd()}") logger.info(f"Video path exists: {os.path.exists(video_path)}") logger.info(f"Video path permissions: {oct(os.stat(video_path).st_mode)[-3:]}") # Extract frames frames = extract_frames(video_path, num_frames=24) if not frames: return None, None, "Error: No frames could be extracted from the video." # Create panorama equirect = create_360_panorama(frames) logger.info("Created equirectangular panorama") # Create cubemap cubemap = equirect_to_cubemap(equirect) logger.info("Created cubemap") # Save paths timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") equirect_path = f"360_photo_{timestamp}.jpg" cubemap_path = f"cubemap_{timestamp}.jpg" # Save equirectangular image logger.info("Saving equirectangular image...") cv2.imwrite(equirect_path, equirect) # Add metadata to equirectangular image height, width = equirect.shape[:2] write_xmp_to_jpg(equirect_path, equirect_path, width, height) logger.info("Added 360 metadata to equirectangular image") # Save cubemap logger.info("Saving cubemap...") cv2.imwrite(cubemap_path, cubemap) return equirect_path, cubemap_path, "Processing completed successfully!" except Exception as e: logger.error(f"Error in process_video: {str(e)}") return None, None, f"Error during processing: {str(e)}" # Create Gradio interface iface = gr.Interface( fn=process_video, inputs=gr.Video(label="Upload 360° Video"), outputs=[ gr.Image(label="360° Photo (with metadata)"), gr.Image(label="Cubemap View"), gr.Textbox(label="Status") ], title="360° Video to Photo Converter", description=""" Upload a 360° panoramic video (shot with iPhone wide-angle lens) to convert it into: 1. 360° Photo with proper metadata (can be viewed in Google Photos, Facebook, etc.) 2. Cubemap view Tips for best results: - Keep video length under 30 seconds - Ensure steady camera motion - Video should complete a full 360° rotation - Maintain consistent camera height - Good lighting conditions help with stitching """, flagging_mode="never" ) # Launch with queue if __name__ == "__main__": iface.queue().launch( server_name="0.0.0.0", server_port=7860 )