FurnitureDemo

Paused

App Files Files Community

FurnitureDemo / app.py

blanchon

adjust_bbox_to_divisible_16

02fd27c 6 months ago

raw

history blame

16.5 kB

	import secrets
	from pathlib import Path
	from typing import cast

	import gradio as gr
	import numpy as np
	import spaces
	import torch
	from diffusers import FluxFillPipeline
	from gradio.components.image_editor import EditorValue
	from PIL import Image, ImageFilter, ImageOps

	DEVICE = "cuda"

	EXAMPLES_DIR = Path(__file__).parent / "examples"

	MAX_SEED = np.iinfo(np.int32).max

	SYSTEM_PROMPT = r"""This two-panel split-frame image showcases a furniture in as a product shot versus styled in a room.
	[LEFT] standalone product shot image the furniture on a white background.
	[RIGHT] integrated example within a room scene."""

	MASK_CONTEXT_PADDING = 16 * 8

	if not torch.cuda.is_available():

	def _dummy_pipe(image: Image.Image, args, *kwargs): # noqa: ARG001
	return {"images": [image]}

	pipe = _dummy_pipe
	else:
	state_dict, network_alphas = FluxFillPipeline.lora_state_dict(
	pretrained_model_name_or_path_or_dict="blanchon/FluxFillFurniture",
	weight_name="pytorch_lora_weights3.safetensors",
	return_alphas=True,
	)

	if not all(("lora" in key or "dora_scale" in key) for key in state_dict):
	msg = "Invalid LoRA checkpoint."
	raise ValueError(msg)

	pipe = FluxFillPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16
	).to(DEVICE)
	FluxFillPipeline.load_lora_into_transformer(
	state_dict=state_dict,
	network_alphas=network_alphas,
	transformer=pipe.transformer,
	)
	pipe.to(DEVICE)


	def make_example(image_path: Path, mask_path: Path) -> EditorValue:
	background_image = Image.open(image_path)
	background_image = background_image.convert("RGB")
	background = np.array(background_image)

	mask_image = Image.open(mask_path)
	mask_image = mask_image.convert("RGB")

	mask = np.array(mask_image)
	mask = mask[:, :, 0]
	mask = np.where(mask == 255, 0, 255) # noqa: PLR2004

	if background.shape[0] != mask.shape[0] or background.shape[1] != mask.shape[1]:
	msg = "Background and mask must have the same shape"
	raise ValueError(msg)

	layer = np.zeros((background.shape[0], background.shape[1], 4), dtype=np.uint8)
	layer[:, :, 3] = mask

	composite = np.zeros((background.shape[0], background.shape[1], 4), dtype=np.uint8)
	composite[:, :, :3] = background
	composite[:, :, 3] = np.where(mask == 255, 0, 255) # noqa: PLR2004

	return {
	"background": background,
	"layers": [layer],
	"composite": composite,
	}


	def remove_padding(image, original_size):
	original_width, original_height = original_size
	left = max((image.width - original_width) // 2, 0)
	top = max((image.height - original_height) // 2, 0)
	right = left + original_width
	bottom = top + original_height
	return image.crop((left, top, right, bottom))


	def adjust_bbox_to_divisible_16(
	x_min, y_min, x_max, y_max, width, height, padding=MASK_CONTEXT_PADDING
	):
	# Add padding
	x_min = max(x_min - padding, 0)
	y_min = max(y_min - padding, 0)
	x_max = min(x_max + padding, width)
	y_max = min(y_max + padding, height)

	# Calculate current bbox width and height
	bbox_width = x_max - x_min
	bbox_height = y_max - y_min

	# Ensure bbox dimensions are divisible by 16
	if bbox_width % 16 != 0:
	adjustment = 16 - (bbox_width % 16)
	x_min = max(x_min - adjustment // 2, 0)
	x_max = min(x_max + adjustment // 2, width)

	if bbox_height % 16 != 0:
	adjustment = 16 - (bbox_height % 16)
	y_min = max(y_min - adjustment // 2, 0)
	y_max = min(y_max + adjustment // 2, height)

	# Ensure bbox is still within bounds
	x_min = max(x_min, 0)
	y_min = max(y_min, 0)
	x_max = min(x_max, width)
	y_max = min(y_max, height)

	return x_min, y_min, x_max, y_max


	@spaces.GPU(duration=150)
	def infer(
	furniture_image_input: Image.Image,
	room_image_input: EditorValue,
	furniture_prompt: str = "",
	seed: int = 42,
	randomize_seed: bool = False,
	guidance_scale: float = 3.5,
	num_inference_steps: int = 20,
	max_dimension: int = 720,
	num_images_per_prompt: int = 2,
	progress: gr.Progress = gr.Progress(track_tqdm=True), # noqa: ARG001, B008
	):
	# Ensure max_dimension is a multiple of 16 (for VAE)
	max_dimension = (max_dimension // 16) * 16

	room_image = room_image_input["background"]
	if room_image is None:
	msg = "Room image is required"
	raise ValueError(msg)
	room_image = cast("Image.Image", room_image)

	room_mask = room_image_input["layers"][0]
	if room_mask is None:
	msg = "Room mask is required"
	raise ValueError(msg)
	room_mask = cast("Image.Image", room_mask)

	mask_bbox_x_min, mask_bbox_y_min, mask_bbox_x_max, mask_bbox_y_max = (
	room_mask.getbbox(alpha_only=False)
	)
	mask_bbox_x_min, mask_bbox_y_min, mask_bbox_x_max, mask_bbox_y_max = (
	adjust_bbox_to_divisible_16(
	mask_bbox_x_min,
	mask_bbox_y_min,
	mask_bbox_x_max,
	mask_bbox_y_max,
	room_mask.width,
	room_mask.height,
	padding=MASK_CONTEXT_PADDING,
	)
	)

	bbox_longest_side = max(
	mask_bbox_x_max - mask_bbox_x_min,
	mask_bbox_y_max - mask_bbox_y_min,
	)

	room_image_cropped = room_image.crop((
	mask_bbox_x_min,
	mask_bbox_y_min,
	mask_bbox_x_max,
	mask_bbox_y_max,
	))
	room_image_cropped = ImageOps.pad(
	room_image_cropped,
	(bbox_longest_side, bbox_longest_side),
	# White padding
	color=(255, 255, 255),
	centering=(0.5, 0.5),
	)
	room_image_cropped = ImageOps.fit(
	room_image_cropped,
	(max_dimension, max_dimension),
	method=Image.Resampling.BICUBIC,
	centering=(0.5, 0.5),
	)

	room_mask_cropped = room_mask.crop((
	mask_bbox_x_min,
	mask_bbox_y_min,
	mask_bbox_x_max,
	mask_bbox_y_max,
	))
	room_mask_cropped.save("room_mask_croppedv1.png")
	room_mask_cropped = ImageOps.pad(
	room_mask_cropped,
	(max_dimension, max_dimension),
	# White padding
	color=(255, 255, 255),
	centering=(0.5, 0.5),
	)
	room_mask_cropped = ImageOps.fit(
	room_mask_cropped,
	(max_dimension, max_dimension),
	method=Image.Resampling.BICUBIC,
	centering=(0.5, 0.5),
	)

	room_image_cropped.save("room_image_cropped.png")
	room_mask_cropped.save("room_mask_cropped.png")

	furniture_image = ImageOps.pad(
	furniture_image_input,
	(max_dimension, max_dimension),
	# White padding
	color=(255, 255, 255),
	centering=(0.5, 0.5),
	)

	furniture_mask = Image.new("RGB", (max_dimension, max_dimension), (255, 255, 255))

	image = Image.new(
	"RGB",
	(max_dimension * 2, max_dimension),
	(255, 255, 255),
	)
	# Paste on the center of the image
	image.paste(furniture_image, (0, 0))
	image.paste(room_image_cropped, (max_dimension, 0))

	mask = Image.new(
	"RGB",
	(max_dimension * 2, max_dimension),
	(255, 255, 255),
	)
	mask.paste(furniture_mask, (0, 0))
	mask.paste(room_mask_cropped, (max_dimension, 0), room_mask_cropped)
	# Invert the mask
	mask = ImageOps.invert(mask)
	# Blur the mask
	mask = mask.filter(ImageFilter.GaussianBlur(radius=10))
	# Convert to 3 channel
	mask = mask.convert("L")

	if randomize_seed:
	seed = secrets.randbelow(MAX_SEED)

	prompt = (
	furniture_prompt + ".\n" + SYSTEM_PROMPT if furniture_prompt else SYSTEM_PROMPT
	)
	image.save("image.png")
	mask.save("mask.png")
	results_images = pipe(
	prompt=prompt,
	image=image,
	mask_image=mask,
	height=max_dimension,
	width=max_dimension * 2,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	num_images_per_prompt=num_images_per_prompt,
	generator=torch.Generator("cpu").manual_seed(seed),
	)["images"]

	final_images = []
	for image in results_images:
	final_image = room_image.copy()

	# Downscale back to the bbox_longest_side
	image_generated = image.crop((
	max_dimension,
	0,
	max_dimension * 2,
	max_dimension,
	))
	image_generated = image_generated.resize(
	(bbox_longest_side, bbox_longest_side), Image.Resampling.BICUBIC
	)
	# Crop back to the bbox (remove the padding)
	image_generated = remove_padding(
	image_generated,
	(
	mask_bbox_x_max - mask_bbox_x_min,
	mask_bbox_y_max - mask_bbox_y_min,
	),
	)
	# Paste the image on the room image as the crop was done
	# on the room image
	final_image.paste(image_generated, (mask_bbox_x_min, mask_bbox_y_min))
	final_images.append(final_image)

	return final_images, seed


	intro_markdown = r"""
	<div>
	<div>
	<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 40px;">
	<b>AnyFurnish</b>
	</div>
	<br>
	<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
	<a href="https://github.com/julien-blanchon/"><img src="https://img.shields.io/static/v1?label=Github Report&message=Github&color=green"></a> &ensp;
	</div>
	<br>
	<div style="display: flex; text-align: center; font-size: 14px; padding-right: 300px; padding-left: 300px;">
	AnyFurnish is a tool that allows you to generate furniture images using Flux.1 Fill Dev.
	You can upload a furniture image and a room image, and the tool will generate a new image with the furniture in the room.
	</div>
	</div>
	</div>
	"""

	css = r"""
	#col-left {
	margin: 0 auto;
	max-width: 430px;
	}
	#col-mid {
	margin: 0 auto;
	max-width: 430px;
	}
	#col-right {
	margin: 0 auto;
	max-width: 430px;
	}
	#col-showcase {
	margin: 0 auto;
	max-width: 1100px;
	}
	"""


	with gr.Blocks(css=css) as demo:
	gr.Markdown(intro_markdown)
	with gr.Row():
	with gr.Column(elem_id="col-left"):
	gr.HTML(
	"""
	<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px;">
	<div>
	Step 1. Upload a furniture image ⬇️
	</div>
	</div>
	""",
	max_height=50,
	)
	furniture_image_input = gr.Image(
	label="Furniture Image",
	type="pil",
	sources=["upload"],
	image_mode="RGB",
	height=500,
	)
	furniture_examples = gr.Examples(
	examples=[
	EXAMPLES_DIR / "1" / "furniture_image.png",
	EXAMPLES_DIR / "2" / "furniture_image.png",
	],
	examples_per_page=12,
	inputs=[furniture_image_input],
	)
	with gr.Column(elem_id="col-mid"):
	gr.HTML(
	"""
	<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px;">
	<div>
	Step 2. Upload a room image ⬇️
	</div>
	</div>
	""",
	max_height=50,
	)
	room_image_input = gr.ImageEditor(
	label="Room Image - Draw mask for inpainting",
	type="pil",
	sources=["upload"],
	image_mode="RGBA",
	layers=False,
	brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"),
	height=500,
	)
	room_examples = gr.Examples(
	examples=[
	make_example(
	EXAMPLES_DIR / "1" / "room_image.png",
	EXAMPLES_DIR / "1" / "room_mask.png",
	),
	make_example(
	EXAMPLES_DIR / "2" / "room_image.png",
	EXAMPLES_DIR / "2" / "room_mask.png",
	),
	],
	inputs=[room_image_input],
	)
	with gr.Column(elem_id="col-right"):
	gr.HTML(
	"""
	<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px;">
	<div>
	Step 3. Press Run to launch
	</div>
	</div>
	""",
	max_height=50,
	)
	results = gr.Gallery(
	label="Results",
	format="png",
	show_label=False,
	columns=2,
	height=500,
	)
	run_button = gr.Button("Run")

	# Reset the results when the run button is clicked
	run_button.click(
	outputs=results,
	fn=lambda: None,
	)
	with gr.Accordion("Advanced Settings", open=False):
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	furniture_prompt = gr.Text(
	label="Prompt",
	max_lines=1,
	placeholder="Enter a custom furniture description (optional)",
	container=False,
	)
	with gr.Column():
	max_dimension = gr.Slider(
	label="Max Dimension",
	minimum=512,
	maximum=1024,
	step=128,
	value=720,
	)

	num_images_per_prompt = gr.Slider(
	label="Number of images per prompt",
	minimum=1,
	maximum=4,
	step=1,
	value=2,
	)

	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=1,
	maximum=30,
	step=0.5,
	# value=50, # noqa: ERA001
	value=30,
	)

	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=50,
	step=1,
	value=20,
	)

	with gr.Column(elem_id="col-showcase"):
	gr.HTML("""
	<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 20px;">
	<div> </div>
	<br>
	<div>
	AnyFurnish examples in pairs of furniture and room images
	</div>
	</div>
	""")
	show_case = gr.Examples(
	examples=[
	[
	EXAMPLES_DIR / "1" / "furniture_image.png",
	make_example(
	EXAMPLES_DIR / "1" / "room_image.png",
	EXAMPLES_DIR / "1" / "room_mask.png",
	),
	],
	[
	EXAMPLES_DIR / "2" / "furniture_image.png",
	make_example(
	EXAMPLES_DIR / "2" / "room_image.png",
	EXAMPLES_DIR / "2" / "room_mask.png",
	),
	],
	],
	inputs=[furniture_image_input, room_image_input],
	label=None,
	)

	gr.on(
	triggers=[run_button.click],
	fn=infer,
	inputs=[
	furniture_image_input,
	room_image_input,
	furniture_prompt,
	seed,
	randomize_seed,
	guidance_scale,
	num_inference_steps,
	max_dimension,
	num_images_per_prompt,
	],
	outputs=[results, seed],
	)

	demo.launch()