Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
e53bd2f
1
Parent(s):
a5c3b38
feat: added _get_crops_coords_list function to automatically define ctop,cleft coord to focus on image generation, helps to better harmonize the image and corrects the problem of flattened elements.
Browse files- app.py +5 -4
- mixture_tiling_sdxl.py +58 -9
app.py
CHANGED
@@ -32,7 +32,7 @@ pipe = StableDiffusionXLTilingPipeline.from_pretrained(
|
|
32 |
#variant="fp16",
|
33 |
).to("cuda")
|
34 |
|
35 |
-
|
36 |
pipe.enable_vae_tiling()
|
37 |
pipe.enable_vae_slicing()
|
38 |
|
@@ -50,6 +50,8 @@ def select_scheduler(scheduler_name):
|
|
50 |
scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
|
51 |
return scheduler
|
52 |
|
|
|
|
|
53 |
@spaces.GPU
|
54 |
def predict(left_prompt, center_prompt, right_prompt, negative_prompt, left_gs, center_gs, right_gs, overlap_pixels, steps, generation_seed, scheduler, tile_height, tile_width, target_height, target_width):
|
55 |
global pipe
|
@@ -80,10 +82,9 @@ def predict(left_prompt, center_prompt, right_prompt, negative_prompt, left_gs,
|
|
80 |
tile_width=tile_width,
|
81 |
tile_row_overlap=0,
|
82 |
tile_col_overlap=overlap_pixels,
|
83 |
-
guidance_scale_tiles=[[left_gs, center_gs, right_gs]],
|
84 |
height=target_height,
|
85 |
-
width=target_width,
|
86 |
-
target_size=(target_height, target_width),
|
87 |
generator=generator,
|
88 |
num_inference_steps=steps,
|
89 |
)["images"][0]
|
|
|
32 |
#variant="fp16",
|
33 |
).to("cuda")
|
34 |
|
35 |
+
pipe.enable_model_cpu_offload() #<< Enable this if you have limited VRAM
|
36 |
pipe.enable_vae_tiling()
|
37 |
pipe.enable_vae_slicing()
|
38 |
|
|
|
50 |
scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
|
51 |
return scheduler
|
52 |
|
53 |
+
|
54 |
+
|
55 |
@spaces.GPU
|
56 |
def predict(left_prompt, center_prompt, right_prompt, negative_prompt, left_gs, center_gs, right_gs, overlap_pixels, steps, generation_seed, scheduler, tile_height, tile_width, target_height, target_width):
|
57 |
global pipe
|
|
|
82 |
tile_width=tile_width,
|
83 |
tile_row_overlap=0,
|
84 |
tile_col_overlap=overlap_pixels,
|
85 |
+
guidance_scale_tiles=[[left_gs, center_gs, right_gs]],
|
86 |
height=target_height,
|
87 |
+
width=target_width,
|
|
|
88 |
generator=generator,
|
89 |
num_inference_steps=steps,
|
90 |
)["images"][0]
|
mixture_tiling_sdxl.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# Copyright
|
2 |
#
|
3 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
# you may not use this file except in compliance with the License.
|
@@ -150,6 +150,49 @@ def _tile2latent_exclusive_indices(
|
|
150 |
# return row_init, row_end, col_init, col_end
|
151 |
return row_segment[0], row_segment[1], col_segment[0], col_segment[1]
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
155 |
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
@@ -757,10 +800,10 @@ class StableDiffusionXLTilingPipeline(
|
|
757 |
return_dict: bool = True,
|
758 |
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
759 |
original_size: Optional[Tuple[int, int]] = None,
|
760 |
-
crops_coords_top_left: Tuple[int, int] =
|
761 |
target_size: Optional[Tuple[int, int]] = None,
|
762 |
negative_original_size: Optional[Tuple[int, int]] = None,
|
763 |
-
negative_crops_coords_top_left: Tuple[int, int] =
|
764 |
negative_target_size: Optional[Tuple[int, int]] = None,
|
765 |
clip_skip: Optional[int] = None,
|
766 |
tile_height: Optional[int] = 1024,
|
@@ -826,7 +869,7 @@ class StableDiffusionXLTilingPipeline(
|
|
826 |
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
827 |
explained in section 2.2 of
|
828 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
829 |
-
crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
830 |
`crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
|
831 |
`crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
|
832 |
`crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
|
@@ -840,7 +883,7 @@ class StableDiffusionXLTilingPipeline(
|
|
840 |
micro-conditioning as explained in section 2.2 of
|
841 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
842 |
information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
|
843 |
-
negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
844 |
To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
|
845 |
micro-conditioning as explained in section 2.2 of
|
846 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
@@ -883,6 +926,8 @@ class StableDiffusionXLTilingPipeline(
|
|
883 |
|
884 |
original_size = original_size or (height, width)
|
885 |
target_size = target_size or (height, width)
|
|
|
|
|
886 |
|
887 |
self._guidance_scale = guidance_scale
|
888 |
self._clip_skip = clip_skip
|
@@ -890,8 +935,7 @@ class StableDiffusionXLTilingPipeline(
|
|
890 |
self._interrupt = False
|
891 |
|
892 |
grid_rows = len(prompt)
|
893 |
-
grid_cols = len(prompt[0])
|
894 |
-
|
895 |
tiles_mode = [mode.value for mode in self.SeedTilesMode]
|
896 |
|
897 |
if isinstance(seed_tiles_mode, str):
|
@@ -913,6 +957,11 @@ class StableDiffusionXLTilingPipeline(
|
|
913 |
batch_size = 1
|
914 |
|
915 |
device = self._execution_device
|
|
|
|
|
|
|
|
|
|
|
916 |
|
917 |
# update height and width tile size and tile overlap size
|
918 |
height = tile_height + (grid_rows - 1) * (tile_height - tile_row_overlap)
|
@@ -1020,7 +1069,7 @@ class StableDiffusionXLTilingPipeline(
|
|
1020 |
text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
|
1021 |
add_time_ids = self._get_add_time_ids(
|
1022 |
original_size,
|
1023 |
-
crops_coords_top_left,
|
1024 |
target_size,
|
1025 |
dtype=prompt_embeds.dtype,
|
1026 |
text_encoder_projection_dim=text_encoder_projection_dim,
|
@@ -1028,7 +1077,7 @@ class StableDiffusionXLTilingPipeline(
|
|
1028 |
if negative_original_size is not None and negative_target_size is not None:
|
1029 |
negative_add_time_ids = self._get_add_time_ids(
|
1030 |
negative_original_size,
|
1031 |
-
negative_crops_coords_top_left,
|
1032 |
negative_target_size,
|
1033 |
dtype=prompt_embeds.dtype,
|
1034 |
text_encoder_projection_dim=text_encoder_projection_dim,
|
|
|
1 |
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2 |
#
|
3 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
# you may not use this file except in compliance with the License.
|
|
|
150 |
# return row_init, row_end, col_init, col_end
|
151 |
return row_segment[0], row_segment[1], col_segment[0], col_segment[1]
|
152 |
|
153 |
+
def _get_crops_coords_list(num_rows, num_cols, output_width):
|
154 |
+
"""
|
155 |
+
Generates a list of lists of `crops_coords_top_left` tuples for focusing on
|
156 |
+
different horizontal parts of an image, and repeats this list for the specified
|
157 |
+
number of rows in the output structure.
|
158 |
+
|
159 |
+
This function calculates `crops_coords_top_left` tuples to create horizontal
|
160 |
+
focus variations (like left, center, right focus) based on `output_width`
|
161 |
+
and `num_cols` (which represents the number of horizontal focus points/columns).
|
162 |
+
It then repeats the *list* of these horizontal focus tuples `num_rows` times to
|
163 |
+
create the final list of lists output structure.
|
164 |
+
|
165 |
+
Args:
|
166 |
+
num_rows (int): The desired number of rows in the output list of lists.
|
167 |
+
This determines how many times the list of horizontal
|
168 |
+
focus variations will be repeated.
|
169 |
+
num_cols (int): The number of horizontal focus points (columns) to generate.
|
170 |
+
This determines how many horizontal focus variations are
|
171 |
+
created based on dividing the `output_width`.
|
172 |
+
output_width (int): The desired width of the output image.
|
173 |
+
|
174 |
+
Returns:
|
175 |
+
list[list[tuple[int, int]]]: A list of lists of tuples. Each inner list
|
176 |
+
contains `num_cols` tuples of `(ctop, cleft)`,
|
177 |
+
representing horizontal focus points. The outer list
|
178 |
+
contains `num_rows` such inner lists.
|
179 |
+
"""
|
180 |
+
crops_coords_list = []
|
181 |
+
if num_cols <= 0:
|
182 |
+
crops_coords_list = []
|
183 |
+
elif num_cols == 1:
|
184 |
+
crops_coords_list = [(0, 0)]
|
185 |
+
else:
|
186 |
+
section_width = output_width / num_cols
|
187 |
+
for i in range(num_cols):
|
188 |
+
cleft = int(round(i * section_width))
|
189 |
+
crops_coords_list.append((0, cleft))
|
190 |
+
|
191 |
+
result_list = []
|
192 |
+
for _ in range(num_rows):
|
193 |
+
result_list.append(list(crops_coords_list))
|
194 |
+
|
195 |
+
return result_list
|
196 |
|
197 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
198 |
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
|
800 |
return_dict: bool = True,
|
801 |
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
802 |
original_size: Optional[Tuple[int, int]] = None,
|
803 |
+
crops_coords_top_left: Optional[List[List[Tuple[int, int]]]] = None,
|
804 |
target_size: Optional[Tuple[int, int]] = None,
|
805 |
negative_original_size: Optional[Tuple[int, int]] = None,
|
806 |
+
negative_crops_coords_top_left: Optional[List[List[Tuple[int, int]]]] = None,
|
807 |
negative_target_size: Optional[Tuple[int, int]] = None,
|
808 |
clip_skip: Optional[int] = None,
|
809 |
tile_height: Optional[int] = 1024,
|
|
|
869 |
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
870 |
explained in section 2.2 of
|
871 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
872 |
+
crops_coords_top_left (`List[List[Tuple[int, int]]]`, *optional*, defaults to (0, 0)):
|
873 |
`crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
|
874 |
`crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
|
875 |
`crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
|
|
|
883 |
micro-conditioning as explained in section 2.2 of
|
884 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
885 |
information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
|
886 |
+
negative_crops_coords_top_left (`List[List[Tuple[int, int]]]`, *optional*, defaults to (0, 0)):
|
887 |
To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
|
888 |
micro-conditioning as explained in section 2.2 of
|
889 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
|
|
926 |
|
927 |
original_size = original_size or (height, width)
|
928 |
target_size = target_size or (height, width)
|
929 |
+
negative_original_size = negative_original_size or (height, width)
|
930 |
+
negative_target_size = negative_target_size or (height, width)
|
931 |
|
932 |
self._guidance_scale = guidance_scale
|
933 |
self._clip_skip = clip_skip
|
|
|
935 |
self._interrupt = False
|
936 |
|
937 |
grid_rows = len(prompt)
|
938 |
+
grid_cols = len(prompt[0])
|
|
|
939 |
tiles_mode = [mode.value for mode in self.SeedTilesMode]
|
940 |
|
941 |
if isinstance(seed_tiles_mode, str):
|
|
|
957 |
batch_size = 1
|
958 |
|
959 |
device = self._execution_device
|
960 |
+
|
961 |
+
# update crops coords list
|
962 |
+
crops_coords_top_left = _get_crops_coords_list(grid_rows, grid_cols, tile_width)
|
963 |
+
if negative_original_size is not None and negative_target_size is not None:
|
964 |
+
negative_crops_coords_top_left = _get_crops_coords_list(grid_rows, grid_cols, tile_width)
|
965 |
|
966 |
# update height and width tile size and tile overlap size
|
967 |
height = tile_height + (grid_rows - 1) * (tile_height - tile_row_overlap)
|
|
|
1069 |
text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
|
1070 |
add_time_ids = self._get_add_time_ids(
|
1071 |
original_size,
|
1072 |
+
crops_coords_top_left[row][col],
|
1073 |
target_size,
|
1074 |
dtype=prompt_embeds.dtype,
|
1075 |
text_encoder_projection_dim=text_encoder_projection_dim,
|
|
|
1077 |
if negative_original_size is not None and negative_target_size is not None:
|
1078 |
negative_add_time_ids = self._get_add_time_ids(
|
1079 |
negative_original_size,
|
1080 |
+
negative_crops_coords_top_left[row][col],
|
1081 |
negative_target_size,
|
1082 |
dtype=prompt_embeds.dtype,
|
1083 |
text_encoder_projection_dim=text_encoder_projection_dim,
|