elismasilva commited on
Commit
e53bd2f
·
1 Parent(s): a5c3b38

feat: added _get_crops_coords_list function to automatically define ctop,cleft coord to focus on image generation, helps to better harmonize the image and corrects the problem of flattened elements.

Browse files
Files changed (2) hide show
  1. app.py +5 -4
  2. mixture_tiling_sdxl.py +58 -9
app.py CHANGED
@@ -32,7 +32,7 @@ pipe = StableDiffusionXLTilingPipeline.from_pretrained(
32
  #variant="fp16",
33
  ).to("cuda")
34
 
35
- #pipe.enable_model_cpu_offload() #<< Enable this if you have limited VRAM
36
  pipe.enable_vae_tiling()
37
  pipe.enable_vae_slicing()
38
 
@@ -50,6 +50,8 @@ def select_scheduler(scheduler_name):
50
  scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
51
  return scheduler
52
 
 
 
53
  @spaces.GPU
54
  def predict(left_prompt, center_prompt, right_prompt, negative_prompt, left_gs, center_gs, right_gs, overlap_pixels, steps, generation_seed, scheduler, tile_height, tile_width, target_height, target_width):
55
  global pipe
@@ -80,10 +82,9 @@ def predict(left_prompt, center_prompt, right_prompt, negative_prompt, left_gs,
80
  tile_width=tile_width,
81
  tile_row_overlap=0,
82
  tile_col_overlap=overlap_pixels,
83
- guidance_scale_tiles=[[left_gs, center_gs, right_gs]],
84
  height=target_height,
85
- width=target_width,
86
- target_size=(target_height, target_width),
87
  generator=generator,
88
  num_inference_steps=steps,
89
  )["images"][0]
 
32
  #variant="fp16",
33
  ).to("cuda")
34
 
35
+ pipe.enable_model_cpu_offload() #<< Enable this if you have limited VRAM
36
  pipe.enable_vae_tiling()
37
  pipe.enable_vae_slicing()
38
 
 
50
  scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
51
  return scheduler
52
 
53
+
54
+
55
  @spaces.GPU
56
  def predict(left_prompt, center_prompt, right_prompt, negative_prompt, left_gs, center_gs, right_gs, overlap_pixels, steps, generation_seed, scheduler, tile_height, tile_width, target_height, target_width):
57
  global pipe
 
82
  tile_width=tile_width,
83
  tile_row_overlap=0,
84
  tile_col_overlap=overlap_pixels,
85
+ guidance_scale_tiles=[[left_gs, center_gs, right_gs]],
86
  height=target_height,
87
+ width=target_width,
 
88
  generator=generator,
89
  num_inference_steps=steps,
90
  )["images"][0]
mixture_tiling_sdxl.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
2
  #
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
  # you may not use this file except in compliance with the License.
@@ -150,6 +150,49 @@ def _tile2latent_exclusive_indices(
150
  # return row_init, row_end, col_init, col_end
151
  return row_segment[0], row_segment[1], col_segment[0], col_segment[1]
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
155
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
@@ -757,10 +800,10 @@ class StableDiffusionXLTilingPipeline(
757
  return_dict: bool = True,
758
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
759
  original_size: Optional[Tuple[int, int]] = None,
760
- crops_coords_top_left: Tuple[int, int] = (0, 0),
761
  target_size: Optional[Tuple[int, int]] = None,
762
  negative_original_size: Optional[Tuple[int, int]] = None,
763
- negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
764
  negative_target_size: Optional[Tuple[int, int]] = None,
765
  clip_skip: Optional[int] = None,
766
  tile_height: Optional[int] = 1024,
@@ -826,7 +869,7 @@ class StableDiffusionXLTilingPipeline(
826
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
827
  explained in section 2.2 of
828
  [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
829
- crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
830
  `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
831
  `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
832
  `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
@@ -840,7 +883,7 @@ class StableDiffusionXLTilingPipeline(
840
  micro-conditioning as explained in section 2.2 of
841
  [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
842
  information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
843
- negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
844
  To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
845
  micro-conditioning as explained in section 2.2 of
846
  [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
@@ -883,6 +926,8 @@ class StableDiffusionXLTilingPipeline(
883
 
884
  original_size = original_size or (height, width)
885
  target_size = target_size or (height, width)
 
 
886
 
887
  self._guidance_scale = guidance_scale
888
  self._clip_skip = clip_skip
@@ -890,8 +935,7 @@ class StableDiffusionXLTilingPipeline(
890
  self._interrupt = False
891
 
892
  grid_rows = len(prompt)
893
- grid_cols = len(prompt[0])
894
-
895
  tiles_mode = [mode.value for mode in self.SeedTilesMode]
896
 
897
  if isinstance(seed_tiles_mode, str):
@@ -913,6 +957,11 @@ class StableDiffusionXLTilingPipeline(
913
  batch_size = 1
914
 
915
  device = self._execution_device
 
 
 
 
 
916
 
917
  # update height and width tile size and tile overlap size
918
  height = tile_height + (grid_rows - 1) * (tile_height - tile_row_overlap)
@@ -1020,7 +1069,7 @@ class StableDiffusionXLTilingPipeline(
1020
  text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
1021
  add_time_ids = self._get_add_time_ids(
1022
  original_size,
1023
- crops_coords_top_left,
1024
  target_size,
1025
  dtype=prompt_embeds.dtype,
1026
  text_encoder_projection_dim=text_encoder_projection_dim,
@@ -1028,7 +1077,7 @@ class StableDiffusionXLTilingPipeline(
1028
  if negative_original_size is not None and negative_target_size is not None:
1029
  negative_add_time_ids = self._get_add_time_ids(
1030
  negative_original_size,
1031
- negative_crops_coords_top_left,
1032
  negative_target_size,
1033
  dtype=prompt_embeds.dtype,
1034
  text_encoder_projection_dim=text_encoder_projection_dim,
 
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
  #
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
  # you may not use this file except in compliance with the License.
 
150
  # return row_init, row_end, col_init, col_end
151
  return row_segment[0], row_segment[1], col_segment[0], col_segment[1]
152
 
153
+ def _get_crops_coords_list(num_rows, num_cols, output_width):
154
+ """
155
+ Generates a list of lists of `crops_coords_top_left` tuples for focusing on
156
+ different horizontal parts of an image, and repeats this list for the specified
157
+ number of rows in the output structure.
158
+
159
+ This function calculates `crops_coords_top_left` tuples to create horizontal
160
+ focus variations (like left, center, right focus) based on `output_width`
161
+ and `num_cols` (which represents the number of horizontal focus points/columns).
162
+ It then repeats the *list* of these horizontal focus tuples `num_rows` times to
163
+ create the final list of lists output structure.
164
+
165
+ Args:
166
+ num_rows (int): The desired number of rows in the output list of lists.
167
+ This determines how many times the list of horizontal
168
+ focus variations will be repeated.
169
+ num_cols (int): The number of horizontal focus points (columns) to generate.
170
+ This determines how many horizontal focus variations are
171
+ created based on dividing the `output_width`.
172
+ output_width (int): The desired width of the output image.
173
+
174
+ Returns:
175
+ list[list[tuple[int, int]]]: A list of lists of tuples. Each inner list
176
+ contains `num_cols` tuples of `(ctop, cleft)`,
177
+ representing horizontal focus points. The outer list
178
+ contains `num_rows` such inner lists.
179
+ """
180
+ crops_coords_list = []
181
+ if num_cols <= 0:
182
+ crops_coords_list = []
183
+ elif num_cols == 1:
184
+ crops_coords_list = [(0, 0)]
185
+ else:
186
+ section_width = output_width / num_cols
187
+ for i in range(num_cols):
188
+ cleft = int(round(i * section_width))
189
+ crops_coords_list.append((0, cleft))
190
+
191
+ result_list = []
192
+ for _ in range(num_rows):
193
+ result_list.append(list(crops_coords_list))
194
+
195
+ return result_list
196
 
197
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
198
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
 
800
  return_dict: bool = True,
801
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
802
  original_size: Optional[Tuple[int, int]] = None,
803
+ crops_coords_top_left: Optional[List[List[Tuple[int, int]]]] = None,
804
  target_size: Optional[Tuple[int, int]] = None,
805
  negative_original_size: Optional[Tuple[int, int]] = None,
806
+ negative_crops_coords_top_left: Optional[List[List[Tuple[int, int]]]] = None,
807
  negative_target_size: Optional[Tuple[int, int]] = None,
808
  clip_skip: Optional[int] = None,
809
  tile_height: Optional[int] = 1024,
 
869
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
870
  explained in section 2.2 of
871
  [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
872
+ crops_coords_top_left (`List[List[Tuple[int, int]]]`, *optional*, defaults to (0, 0)):
873
  `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
874
  `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
875
  `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
 
883
  micro-conditioning as explained in section 2.2 of
884
  [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
885
  information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
886
+ negative_crops_coords_top_left (`List[List[Tuple[int, int]]]`, *optional*, defaults to (0, 0)):
887
  To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
888
  micro-conditioning as explained in section 2.2 of
889
  [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
 
926
 
927
  original_size = original_size or (height, width)
928
  target_size = target_size or (height, width)
929
+ negative_original_size = negative_original_size or (height, width)
930
+ negative_target_size = negative_target_size or (height, width)
931
 
932
  self._guidance_scale = guidance_scale
933
  self._clip_skip = clip_skip
 
935
  self._interrupt = False
936
 
937
  grid_rows = len(prompt)
938
+ grid_cols = len(prompt[0])
 
939
  tiles_mode = [mode.value for mode in self.SeedTilesMode]
940
 
941
  if isinstance(seed_tiles_mode, str):
 
957
  batch_size = 1
958
 
959
  device = self._execution_device
960
+
961
+ # update crops coords list
962
+ crops_coords_top_left = _get_crops_coords_list(grid_rows, grid_cols, tile_width)
963
+ if negative_original_size is not None and negative_target_size is not None:
964
+ negative_crops_coords_top_left = _get_crops_coords_list(grid_rows, grid_cols, tile_width)
965
 
966
  # update height and width tile size and tile overlap size
967
  height = tile_height + (grid_rows - 1) * (tile_height - tile_row_overlap)
 
1069
  text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
1070
  add_time_ids = self._get_add_time_ids(
1071
  original_size,
1072
+ crops_coords_top_left[row][col],
1073
  target_size,
1074
  dtype=prompt_embeds.dtype,
1075
  text_encoder_projection_dim=text_encoder_projection_dim,
 
1077
  if negative_original_size is not None and negative_target_size is not None:
1078
  negative_add_time_ids = self._get_add_time_ids(
1079
  negative_original_size,
1080
+ negative_crops_coords_top_left[row][col],
1081
  negative_target_size,
1082
  dtype=prompt_embeds.dtype,
1083
  text_encoder_projection_dim=text_encoder_projection_dim,