Spaces:
Sleeping
Sleeping
File size: 10,453 Bytes
941c846 77f33ba 941c846 77f33ba efe5371 77f33ba efe5371 941c846 77f33ba 941c846 77f33ba 941c846 77f33ba 941c846 77f33ba 941c846 77f33ba 941c846 77f33ba 941c846 77f33ba 941c846 77f33ba 941c846 77f33ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 |
import gradio as gr
import cv2
import numpy as np
import os
import gc
from tqdm import tqdm
import logging
from PIL import Image
from datetime import datetime
import struct
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def create_xmp_block(width, height):
"""Create XMP metadata block following ExifTool's exact format."""
xmp = (
f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
f'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="ExifTool">\n'
f'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
f'<rdf:Description rdf:about=""\n'
f'xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"\n'
f'GPano:ProjectionType="equirectangular"\n'
f'GPano:UsePanoramaViewer="True"\n'
f'GPano:FullPanoWidthPixels="{width}"\n'
f'GPano:FullPanoHeightPixels="{height}"\n'
f'GPano:CroppedAreaImageWidthPixels="{width}"\n'
f'GPano:CroppedAreaImageHeightPixels="{height}"\n'
f'GPano:CroppedAreaLeftPixels="0"\n'
f'GPano:CroppedAreaTopPixels="0"/>\n'
f'</rdf:RDF>\n'
f'</x:xmpmeta>\n'
f'<?xpacket end="w"?>'
)
return xmp
def write_xmp_to_jpg(input_path, output_path, width, height):
"""Write XMP metadata to JPEG file following ExifTool's method."""
# Read the original JPEG
with open(input_path, 'rb') as f:
data = f.read()
# Find the start of image marker
if data[0:2] != b'\xFF\xD8':
raise ValueError("Not a valid JPEG file")
# Create XMP data
xmp_data = create_xmp_block(width, height)
# Create APP1 segment for XMP
app1_marker = b'\xFF\xE1'
xmp_header = b'http://ns.adobe.com/xap/1.0/\x00'
xmp_bytes = xmp_data.encode('utf-8')
length = len(xmp_header) + len(xmp_bytes) + 2 # +2 for length bytes
length_bytes = struct.pack('>H', length)
# Construct new file content
output = bytearray()
output.extend(data[0:2]) # SOI marker
output.extend(app1_marker)
output.extend(length_bytes)
output.extend(xmp_header)
output.extend(xmp_bytes)
output.extend(data[2:]) # Rest of the original file
# Write the new file
with open(output_path, 'wb') as f:
f.write(output)
def preprocess_frame(frame):
"""Preprocess frame with improved feature detection"""
target_height = 1080
aspect_ratio = frame.shape[1] / frame.shape[0]
target_width = int(target_height * aspect_ratio)
frame = cv2.resize(frame, (target_width, target_height))
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
cl = clahe.apply(l)
enhanced = cv2.merge((cl,a,b))
enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)
return enhanced
def extract_frames(video_path, num_frames=24):
"""Extract frames with progress tracking"""
try:
logger.info(f"Opening video: {video_path}")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise Exception("Could not open video file")
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_indices = np.linspace(0, total_frames-1, num_frames, dtype=int)
frames = []
for idx in frame_indices:
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
ret, frame = cap.read()
if ret:
processed = preprocess_frame(frame)
frames.append(processed)
gc.collect()
cap.release()
logger.info(f"Extracted {len(frames)} frames")
return frames
except Exception as e:
if 'cap' in locals():
cap.release()
raise Exception(f"Frame extraction failed: {str(e)}")
def create_360_panorama(frames):
"""Create an equirectangular panorama with better stitching and wide-angle adjustment"""
try:
if len(frames) < 2:
raise Exception("Need at least 2 frames")
# iPhone wide angle is typically around 120 degrees vertical FOV
# We'll adjust the output size to account for this
vertical_fov = 120 # degrees
total_vertical_fov = 180 # full equirectangular height
# Calculate padding needed
padding_ratio = (total_vertical_fov - vertical_fov) / (2 * total_vertical_fov)
# Create stitcher with custom settings
stitcher = cv2.Stitcher.create(cv2.Stitcher_PANORAMA)
stitcher.setPanoConfidenceThresh(0.8)
logger.info("Starting panorama stitching...")
status, panorama = stitcher.stitch(frames)
if status != cv2.Stitcher_OK:
raise Exception(f"Stitching failed with status {status}")
# Calculate target dimensions
target_height = 1080
target_width = target_height * 2 # 2:1 aspect ratio for equirectangular
# Resize stitched panorama
panorama = cv2.resize(panorama, (target_width, int(target_height * (1 - 2*padding_ratio))))
# Create final image with padding
final_panorama = np.zeros((target_height, target_width, 3), dtype=np.uint8)
# Calculate padding pixels
pad_pixels = int(target_height * padding_ratio)
# Place the panorama in the middle
final_panorama[pad_pixels:target_height-pad_pixels, :] = panorama
# Apply slight feathering at the edges to avoid hard transitions
feather_size = int(pad_pixels * 0.3)
for i in range(feather_size):
alpha = i / feather_size
# Feather top
final_panorama[pad_pixels-feather_size+i, :] = \
(panorama[0, :] * alpha).astype(np.uint8)
# Feather bottom
final_panorama[target_height-pad_pixels+i, :] = \
(panorama[-1, :] * (1-alpha)).astype(np.uint8)
logger.info(f"Created panorama of size {final_panorama.shape} with vertical FOV adjustment")
return final_panorama
except Exception as e:
raise Exception(f"360° panorama creation failed: {str(e)}")
def equirect_to_cubemap(equirect):
"""Convert equirectangular image to cubemap"""
face_size = equirect.shape[0] // 2
cubemap = np.zeros((face_size * 3, face_size * 4, 3), dtype=np.uint8)
rotations = [
(0, 0, 0), # front
(0, 90, 0), # right
(0, 180, 0), # back
(0, 270, 0), # left
(-90, 0, 0), # top
(90, 0, 0) # bottom
]
for i, rotation in enumerate(rotations):
x = (i % 4) * face_size
y = (i // 4) * face_size
R = cv2.Rodrigues(np.array([rotation[0] * np.pi / 180,
rotation[1] * np.pi / 180,
rotation[2] * np.pi / 180]))[0]
for u in range(face_size):
for v in range(face_size):
x_3d = (2 * u / face_size - 1)
y_3d = (2 * v / face_size - 1)
z_3d = 1.0
point = R.dot(np.array([x_3d, y_3d, z_3d]))
theta = np.arctan2(point[0], point[2])
phi = np.arctan2(point[1], np.sqrt(point[0]**2 + point[2]**2))
u_equi = int((theta + np.pi) * equirect.shape[1] / (2 * np.pi))
v_equi = int((phi + np.pi/2) * equirect.shape[0] / np.pi)
if 0 <= u_equi < equirect.shape[1] and 0 <= v_equi < equirect.shape[0]:
cubemap[y+v, x+u] = equirect[v_equi, u_equi]
return cubemap
def process_video(video):
"""Main processing function for Gradio interface"""
try:
if video is None:
return None, None, "Please upload a video file."
video_path = video
if not os.path.exists(video_path):
return None, None, "Error: Video file not found."
# Log the working directory and file permission
logger.info(f"Working directory: {os.getcwd()}")
logger.info(f"Video path exists: {os.path.exists(video_path)}")
logger.info(f"Video path permissions: {oct(os.stat(video_path).st_mode)[-3:]}")
# Extract frames
frames = extract_frames(video_path, num_frames=24)
if not frames:
return None, None, "Error: No frames could be extracted from the video."
# Create panorama
equirect = create_360_panorama(frames)
logger.info("Created equirectangular panorama")
# Create cubemap
cubemap = equirect_to_cubemap(equirect)
logger.info("Created cubemap")
# Save paths
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
equirect_path = f"360_photo_{timestamp}.jpg"
cubemap_path = f"cubemap_{timestamp}.jpg"
# Save equirectangular image
logger.info("Saving equirectangular image...")
cv2.imwrite(equirect_path, equirect)
# Add metadata to equirectangular image
height, width = equirect.shape[:2]
write_xmp_to_jpg(equirect_path, equirect_path, width, height)
logger.info("Added 360 metadata to equirectangular image")
# Save cubemap
logger.info("Saving cubemap...")
cv2.imwrite(cubemap_path, cubemap)
return equirect_path, cubemap_path, "Processing completed successfully!"
except Exception as e:
logger.error(f"Error in process_video: {str(e)}")
return None, None, f"Error during processing: {str(e)}"
# Create Gradio interface
iface = gr.Interface(
fn=process_video,
inputs=gr.Video(label="Upload 360° Video"),
outputs=[
gr.Image(label="360° Photo (with metadata)"),
gr.Image(label="Cubemap View"),
gr.Textbox(label="Status")
],
title="360° Video to Photo Converter",
description="""
Upload a 360° panoramic video (shot with iPhone wide-angle lens) to convert it into:
1. 360° Photo with proper metadata (can be viewed in Google Photos, Facebook, etc.)
2. Cubemap view
Tips for best results:
- Keep video length under 30 seconds
- Ensure steady camera motion
- Video should complete a full 360° rotation
- Maintain consistent camera height
- Good lighting conditions help with stitching
""",
flagging_mode="never"
)
# Launch with queue
if __name__ == "__main__":
iface.queue().launch(
server_name="0.0.0.0",
server_port=7860
) |