aiqcamp commited on
Commit
5f63bf7
Β·
verified Β·
1 Parent(s): a955f9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -42
app.py CHANGED
@@ -43,31 +43,8 @@ pipe = StableDiffusionXLFillPipeline.from_pretrained(
43
 
44
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
45
 
46
- # Make sure all text encoder components use the same dtype
47
- pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float16)
48
- pipe.text_encoder_2 = pipe.text_encoder_2.to(dtype=torch.float16)
49
-
50
- # Patch the text encoder forward methods to ensure consistent dtype
51
- def patch_text_encoder_forward(encoder):
52
- original_forward = encoder.forward
53
-
54
- def patched_forward(*args, **kwargs):
55
- # Convert input tensors to float16
56
- if len(args) > 0 and isinstance(args[0], torch.Tensor):
57
- args = list(args)
58
- args[0] = args[0].to(dtype=torch.float16)
59
-
60
- for key in kwargs:
61
- if isinstance(kwargs[key], torch.Tensor):
62
- kwargs[key] = kwargs[key].to(dtype=torch.float16)
63
-
64
- return original_forward(*args, **kwargs)
65
-
66
- encoder.forward = patched_forward
67
-
68
- # Apply the patch to both encoders
69
- patch_text_encoder_forward(pipe.text_encoder)
70
- patch_text_encoder_forward(pipe.text_encoder_2)
71
 
72
  def can_expand(source_width, source_height, target_width, target_height, alignment):
73
  """Checks if the image can be expanded based on the alignment."""
@@ -170,23 +147,19 @@ def infer(image, width, height, overlap_width, num_inference_steps, resize_optio
170
 
171
  final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
172
 
173
- with torch.cuda.amp.autocast(dtype=torch.float16):
174
- (
175
- prompt_embeds,
176
- negative_prompt_embeds,
177
- pooled_prompt_embeds,
178
- negative_pooled_prompt_embeds,
179
- ) = pipe.encode_prompt(final_prompt, "cuda", True)
180
-
181
- for image in pipe(
182
- prompt_embeds=prompt_embeds,
183
- negative_prompt_embeds=negative_prompt_embeds,
184
- pooled_prompt_embeds=pooled_prompt_embeds,
185
- negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
186
- image=cnet_image,
187
- num_inference_steps=num_inference_steps
188
- ):
189
- yield cnet_image, image
190
 
191
  image = image.convert("RGBA")
192
  cnet_image.paste(image, (0, 0), mask)
 
43
 
44
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
45
 
46
+ # The key differences are below - don't modify the text encoder directly
47
+ # We'll fix it in the pipeline code instead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def can_expand(source_width, source_height, target_width, target_height, alignment):
50
  """Checks if the image can be expanded based on the alignment."""
 
147
 
148
  final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
149
 
150
+ # Important fix: Use the original pipeline's method without modifications
151
+ # Let it handle the dtype conversions internally
152
+ encoded_prompts = pipe.encode_prompt(final_prompt, "cuda", True)
153
+
154
+ for image in pipe(
155
+ prompt_embeds=encoded_prompts[0],
156
+ negative_prompt_embeds=encoded_prompts[1],
157
+ pooled_prompt_embeds=encoded_prompts[2],
158
+ negative_pooled_prompt_embeds=encoded_prompts[3],
159
+ image=cnet_image,
160
+ num_inference_steps=num_inference_steps
161
+ ):
162
+ yield cnet_image, image
 
 
 
 
163
 
164
  image = image.convert("RGBA")
165
  cnet_image.paste(image, (0, 0), mask)