nakas's picture
Update app.py
77f33ba verified
raw
history blame
10.5 kB
import gradio as gr
import cv2
import numpy as np
import os
import gc
from tqdm import tqdm
import logging
from PIL import Image
from datetime import datetime
import struct
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def create_xmp_block(width, height):
"""Create XMP metadata block following ExifTool's exact format."""
xmp = (
f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
f'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="ExifTool">\n'
f'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
f'<rdf:Description rdf:about=""\n'
f'xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"\n'
f'GPano:ProjectionType="equirectangular"\n'
f'GPano:UsePanoramaViewer="True"\n'
f'GPano:FullPanoWidthPixels="{width}"\n'
f'GPano:FullPanoHeightPixels="{height}"\n'
f'GPano:CroppedAreaImageWidthPixels="{width}"\n'
f'GPano:CroppedAreaImageHeightPixels="{height}"\n'
f'GPano:CroppedAreaLeftPixels="0"\n'
f'GPano:CroppedAreaTopPixels="0"/>\n'
f'</rdf:RDF>\n'
f'</x:xmpmeta>\n'
f'<?xpacket end="w"?>'
)
return xmp
def write_xmp_to_jpg(input_path, output_path, width, height):
"""Write XMP metadata to JPEG file following ExifTool's method."""
# Read the original JPEG
with open(input_path, 'rb') as f:
data = f.read()
# Find the start of image marker
if data[0:2] != b'\xFF\xD8':
raise ValueError("Not a valid JPEG file")
# Create XMP data
xmp_data = create_xmp_block(width, height)
# Create APP1 segment for XMP
app1_marker = b'\xFF\xE1'
xmp_header = b'http://ns.adobe.com/xap/1.0/\x00'
xmp_bytes = xmp_data.encode('utf-8')
length = len(xmp_header) + len(xmp_bytes) + 2 # +2 for length bytes
length_bytes = struct.pack('>H', length)
# Construct new file content
output = bytearray()
output.extend(data[0:2]) # SOI marker
output.extend(app1_marker)
output.extend(length_bytes)
output.extend(xmp_header)
output.extend(xmp_bytes)
output.extend(data[2:]) # Rest of the original file
# Write the new file
with open(output_path, 'wb') as f:
f.write(output)
def preprocess_frame(frame):
"""Preprocess frame with improved feature detection"""
target_height = 1080
aspect_ratio = frame.shape[1] / frame.shape[0]
target_width = int(target_height * aspect_ratio)
frame = cv2.resize(frame, (target_width, target_height))
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
cl = clahe.apply(l)
enhanced = cv2.merge((cl,a,b))
enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)
return enhanced
def extract_frames(video_path, num_frames=24):
"""Extract frames with progress tracking"""
try:
logger.info(f"Opening video: {video_path}")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise Exception("Could not open video file")
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_indices = np.linspace(0, total_frames-1, num_frames, dtype=int)
frames = []
for idx in frame_indices:
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
ret, frame = cap.read()
if ret:
processed = preprocess_frame(frame)
frames.append(processed)
gc.collect()
cap.release()
logger.info(f"Extracted {len(frames)} frames")
return frames
except Exception as e:
if 'cap' in locals():
cap.release()
raise Exception(f"Frame extraction failed: {str(e)}")
def create_360_panorama(frames):
"""Create an equirectangular panorama with better stitching and wide-angle adjustment"""
try:
if len(frames) < 2:
raise Exception("Need at least 2 frames")
# iPhone wide angle is typically around 120 degrees vertical FOV
# We'll adjust the output size to account for this
vertical_fov = 120 # degrees
total_vertical_fov = 180 # full equirectangular height
# Calculate padding needed
padding_ratio = (total_vertical_fov - vertical_fov) / (2 * total_vertical_fov)
# Create stitcher with custom settings
stitcher = cv2.Stitcher.create(cv2.Stitcher_PANORAMA)
stitcher.setPanoConfidenceThresh(0.8)
logger.info("Starting panorama stitching...")
status, panorama = stitcher.stitch(frames)
if status != cv2.Stitcher_OK:
raise Exception(f"Stitching failed with status {status}")
# Calculate target dimensions
target_height = 1080
target_width = target_height * 2 # 2:1 aspect ratio for equirectangular
# Resize stitched panorama
panorama = cv2.resize(panorama, (target_width, int(target_height * (1 - 2*padding_ratio))))
# Create final image with padding
final_panorama = np.zeros((target_height, target_width, 3), dtype=np.uint8)
# Calculate padding pixels
pad_pixels = int(target_height * padding_ratio)
# Place the panorama in the middle
final_panorama[pad_pixels:target_height-pad_pixels, :] = panorama
# Apply slight feathering at the edges to avoid hard transitions
feather_size = int(pad_pixels * 0.3)
for i in range(feather_size):
alpha = i / feather_size
# Feather top
final_panorama[pad_pixels-feather_size+i, :] = \
(panorama[0, :] * alpha).astype(np.uint8)
# Feather bottom
final_panorama[target_height-pad_pixels+i, :] = \
(panorama[-1, :] * (1-alpha)).astype(np.uint8)
logger.info(f"Created panorama of size {final_panorama.shape} with vertical FOV adjustment")
return final_panorama
except Exception as e:
raise Exception(f"360° panorama creation failed: {str(e)}")
def equirect_to_cubemap(equirect):
"""Convert equirectangular image to cubemap"""
face_size = equirect.shape[0] // 2
cubemap = np.zeros((face_size * 3, face_size * 4, 3), dtype=np.uint8)
rotations = [
(0, 0, 0), # front
(0, 90, 0), # right
(0, 180, 0), # back
(0, 270, 0), # left
(-90, 0, 0), # top
(90, 0, 0) # bottom
]
for i, rotation in enumerate(rotations):
x = (i % 4) * face_size
y = (i // 4) * face_size
R = cv2.Rodrigues(np.array([rotation[0] * np.pi / 180,
rotation[1] * np.pi / 180,
rotation[2] * np.pi / 180]))[0]
for u in range(face_size):
for v in range(face_size):
x_3d = (2 * u / face_size - 1)
y_3d = (2 * v / face_size - 1)
z_3d = 1.0
point = R.dot(np.array([x_3d, y_3d, z_3d]))
theta = np.arctan2(point[0], point[2])
phi = np.arctan2(point[1], np.sqrt(point[0]**2 + point[2]**2))
u_equi = int((theta + np.pi) * equirect.shape[1] / (2 * np.pi))
v_equi = int((phi + np.pi/2) * equirect.shape[0] / np.pi)
if 0 <= u_equi < equirect.shape[1] and 0 <= v_equi < equirect.shape[0]:
cubemap[y+v, x+u] = equirect[v_equi, u_equi]
return cubemap
def process_video(video):
"""Main processing function for Gradio interface"""
try:
if video is None:
return None, None, "Please upload a video file."
video_path = video
if not os.path.exists(video_path):
return None, None, "Error: Video file not found."
# Log the working directory and file permission
logger.info(f"Working directory: {os.getcwd()}")
logger.info(f"Video path exists: {os.path.exists(video_path)}")
logger.info(f"Video path permissions: {oct(os.stat(video_path).st_mode)[-3:]}")
# Extract frames
frames = extract_frames(video_path, num_frames=24)
if not frames:
return None, None, "Error: No frames could be extracted from the video."
# Create panorama
equirect = create_360_panorama(frames)
logger.info("Created equirectangular panorama")
# Create cubemap
cubemap = equirect_to_cubemap(equirect)
logger.info("Created cubemap")
# Save paths
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
equirect_path = f"360_photo_{timestamp}.jpg"
cubemap_path = f"cubemap_{timestamp}.jpg"
# Save equirectangular image
logger.info("Saving equirectangular image...")
cv2.imwrite(equirect_path, equirect)
# Add metadata to equirectangular image
height, width = equirect.shape[:2]
write_xmp_to_jpg(equirect_path, equirect_path, width, height)
logger.info("Added 360 metadata to equirectangular image")
# Save cubemap
logger.info("Saving cubemap...")
cv2.imwrite(cubemap_path, cubemap)
return equirect_path, cubemap_path, "Processing completed successfully!"
except Exception as e:
logger.error(f"Error in process_video: {str(e)}")
return None, None, f"Error during processing: {str(e)}"
# Create Gradio interface
iface = gr.Interface(
fn=process_video,
inputs=gr.Video(label="Upload 360° Video"),
outputs=[
gr.Image(label="360° Photo (with metadata)"),
gr.Image(label="Cubemap View"),
gr.Textbox(label="Status")
],
title="360° Video to Photo Converter",
description="""
Upload a 360° panoramic video (shot with iPhone wide-angle lens) to convert it into:
1. 360° Photo with proper metadata (can be viewed in Google Photos, Facebook, etc.)
2. Cubemap view
Tips for best results:
- Keep video length under 30 seconds
- Ensure steady camera motion
- Video should complete a full 360° rotation
- Maintain consistent camera height
- Good lighting conditions help with stitching
""",
flagging_mode="never"
)
# Launch with queue
if __name__ == "__main__":
iface.queue().launch(
server_name="0.0.0.0",
server_port=7860
)