Spaces:

nakas
/

360_metadata_image_injector

Running

App Files Files Community

360_metadata_image_injector / app.py

nakas

Update app.py

77f33ba verified 9 months ago

raw

history blame

10.5 kB

	import gradio as gr
	import cv2
	import numpy as np
	import os
	import gc
	from tqdm import tqdm
	import logging
	from PIL import Image
	from datetime import datetime
	import struct

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def create_xmp_block(width, height):
	"""Create XMP metadata block following ExifTool's exact format."""
	xmp = (
	f'<?xpacket begin="ï»¿" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
	f'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="ExifTool">\n'
	f'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
	f'<rdf:Description rdf:about=""\n'
	f'xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"\n'
	f'GPano:ProjectionType="equirectangular"\n'
	f'GPano:UsePanoramaViewer="True"\n'
	f'GPano:FullPanoWidthPixels="{width}"\n'
	f'GPano:FullPanoHeightPixels="{height}"\n'
	f'GPano:CroppedAreaImageWidthPixels="{width}"\n'
	f'GPano:CroppedAreaImageHeightPixels="{height}"\n'
	f'GPano:CroppedAreaLeftPixels="0"\n'
	f'GPano:CroppedAreaTopPixels="0"/>\n'
	f'</rdf:RDF>\n'
	f'</x:xmpmeta>\n'
	f'<?xpacket end="w"?>'
	)
	return xmp

	def write_xmp_to_jpg(input_path, output_path, width, height):
	"""Write XMP metadata to JPEG file following ExifTool's method."""
	# Read the original JPEG
	with open(input_path, 'rb') as f:
	data = f.read()

	# Find the start of image marker
	if data[0:2] != b'\xFF\xD8':
	raise ValueError("Not a valid JPEG file")

	# Create XMP data
	xmp_data = create_xmp_block(width, height)

	# Create APP1 segment for XMP
	app1_marker = b'\xFF\xE1'
	xmp_header = b'http://ns.adobe.com/xap/1.0/\x00'
	xmp_bytes = xmp_data.encode('utf-8')
	length = len(xmp_header) + len(xmp_bytes) + 2 # +2 for length bytes
	length_bytes = struct.pack('>H', length)

	# Construct new file content
	output = bytearray()
	output.extend(data[0:2]) # SOI marker
	output.extend(app1_marker)
	output.extend(length_bytes)
	output.extend(xmp_header)
	output.extend(xmp_bytes)
	output.extend(data[2:]) # Rest of the original file

	# Write the new file
	with open(output_path, 'wb') as f:
	f.write(output)

	def preprocess_frame(frame):
	"""Preprocess frame with improved feature detection"""
	target_height = 1080
	aspect_ratio = frame.shape[1] / frame.shape[0]
	target_width = int(target_height * aspect_ratio)
	frame = cv2.resize(frame, (target_width, target_height))

	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
	lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
	l, a, b = cv2.split(lab)
	cl = clahe.apply(l)
	enhanced = cv2.merge((cl,a,b))
	enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)

	return enhanced

	def extract_frames(video_path, num_frames=24):
	"""Extract frames with progress tracking"""
	try:
	logger.info(f"Opening video: {video_path}")
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	raise Exception("Could not open video file")

	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	frame_indices = np.linspace(0, total_frames-1, num_frames, dtype=int)
	frames = []

	for idx in frame_indices:
	cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	ret, frame = cap.read()
	if ret:
	processed = preprocess_frame(frame)
	frames.append(processed)
	gc.collect()

	cap.release()
	logger.info(f"Extracted {len(frames)} frames")
	return frames

	except Exception as e:
	if 'cap' in locals():
	cap.release()
	raise Exception(f"Frame extraction failed: {str(e)}")

	def create_360_panorama(frames):
	"""Create an equirectangular panorama with better stitching and wide-angle adjustment"""
	try:
	if len(frames) < 2:
	raise Exception("Need at least 2 frames")

	# iPhone wide angle is typically around 120 degrees vertical FOV
	# We'll adjust the output size to account for this
	vertical_fov = 120 # degrees
	total_vertical_fov = 180 # full equirectangular height

	# Calculate padding needed
	padding_ratio = (total_vertical_fov - vertical_fov) / (2 * total_vertical_fov)

	# Create stitcher with custom settings
	stitcher = cv2.Stitcher.create(cv2.Stitcher_PANORAMA)
	stitcher.setPanoConfidenceThresh(0.8)

	logger.info("Starting panorama stitching...")
	status, panorama = stitcher.stitch(frames)

	if status != cv2.Stitcher_OK:
	raise Exception(f"Stitching failed with status {status}")

	# Calculate target dimensions
	target_height = 1080
	target_width = target_height * 2 # 2:1 aspect ratio for equirectangular

	# Resize stitched panorama
	panorama = cv2.resize(panorama, (target_width, int(target_height * (1 - 2*padding_ratio))))

	# Create final image with padding
	final_panorama = np.zeros((target_height, target_width, 3), dtype=np.uint8)

	# Calculate padding pixels
	pad_pixels = int(target_height * padding_ratio)

	# Place the panorama in the middle
	final_panorama[pad_pixels:target_height-pad_pixels, :] = panorama

	# Apply slight feathering at the edges to avoid hard transitions
	feather_size = int(pad_pixels * 0.3)
	for i in range(feather_size):
	alpha = i / feather_size
	# Feather top
	final_panorama[pad_pixels-feather_size+i, :] = \
	(panorama[0, :] * alpha).astype(np.uint8)
	# Feather bottom
	final_panorama[target_height-pad_pixels+i, :] = \
	(panorama[-1, :] * (1-alpha)).astype(np.uint8)

	logger.info(f"Created panorama of size {final_panorama.shape} with vertical FOV adjustment")
	return final_panorama

	except Exception as e:
	raise Exception(f"360° panorama creation failed: {str(e)}")

	def equirect_to_cubemap(equirect):
	"""Convert equirectangular image to cubemap"""
	face_size = equirect.shape[0] // 2
	cubemap = np.zeros((face_size * 3, face_size * 4, 3), dtype=np.uint8)

	rotations = [
	(0, 0, 0), # front
	(0, 90, 0), # right
	(0, 180, 0), # back
	(0, 270, 0), # left
	(-90, 0, 0), # top
	(90, 0, 0) # bottom
	]

	for i, rotation in enumerate(rotations):
	x = (i % 4) * face_size
	y = (i // 4) * face_size

	R = cv2.Rodrigues(np.array([rotation[0] * np.pi / 180,
	rotation[1] * np.pi / 180,
	rotation[2] * np.pi / 180]))[0]

	for u in range(face_size):
	for v in range(face_size):
	x_3d = (2 * u / face_size - 1)
	y_3d = (2 * v / face_size - 1)
	z_3d = 1.0

	point = R.dot(np.array([x_3d, y_3d, z_3d]))
	theta = np.arctan2(point[0], point[2])
	phi = np.arctan2(point[1], np.sqrt(point[0]2 + point[2]2))

	u_equi = int((theta + np.pi) * equirect.shape[1] / (2 * np.pi))
	v_equi = int((phi + np.pi/2) * equirect.shape[0] / np.pi)

	if 0 <= u_equi < equirect.shape[1] and 0 <= v_equi < equirect.shape[0]:
	cubemap[y+v, x+u] = equirect[v_equi, u_equi]

	return cubemap

	def process_video(video):
	"""Main processing function for Gradio interface"""
	try:
	if video is None:
	return None, None, "Please upload a video file."

	video_path = video
	if not os.path.exists(video_path):
	return None, None, "Error: Video file not found."

	# Log the working directory and file permission
	logger.info(f"Working directory: {os.getcwd()}")
	logger.info(f"Video path exists: {os.path.exists(video_path)}")
	logger.info(f"Video path permissions: {oct(os.stat(video_path).st_mode)[-3:]}")

	# Extract frames
	frames = extract_frames(video_path, num_frames=24)
	if not frames:
	return None, None, "Error: No frames could be extracted from the video."

	# Create panorama
	equirect = create_360_panorama(frames)
	logger.info("Created equirectangular panorama")

	# Create cubemap
	cubemap = equirect_to_cubemap(equirect)
	logger.info("Created cubemap")

	# Save paths
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	equirect_path = f"360_photo_{timestamp}.jpg"
	cubemap_path = f"cubemap_{timestamp}.jpg"

	# Save equirectangular image
	logger.info("Saving equirectangular image...")
	cv2.imwrite(equirect_path, equirect)

	# Add metadata to equirectangular image
	height, width = equirect.shape[:2]
	write_xmp_to_jpg(equirect_path, equirect_path, width, height)
	logger.info("Added 360 metadata to equirectangular image")

	# Save cubemap
	logger.info("Saving cubemap...")
	cv2.imwrite(cubemap_path, cubemap)

	return equirect_path, cubemap_path, "Processing completed successfully!"

	except Exception as e:
	logger.error(f"Error in process_video: {str(e)}")
	return None, None, f"Error during processing: {str(e)}"

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_video,
	inputs=gr.Video(label="Upload 360° Video"),
	outputs=[
	gr.Image(label="360° Photo (with metadata)"),
	gr.Image(label="Cubemap View"),
	gr.Textbox(label="Status")
	],
	title="360° Video to Photo Converter",
	description="""
	Upload a 360° panoramic video (shot with iPhone wide-angle lens) to convert it into:
	1. 360° Photo with proper metadata (can be viewed in Google Photos, Facebook, etc.)
	2. Cubemap view

	Tips for best results:
	- Keep video length under 30 seconds
	- Ensure steady camera motion
	- Video should complete a full 360° rotation
	- Maintain consistent camera height
	- Good lighting conditions help with stitching
	""",
	flagging_mode="never"
	)

	# Launch with queue
	if __name__ == "__main__":
	iface.queue().launch(
	server_name="0.0.0.0",
	server_port=7860
	)