Aatricks commited on
Commit
1264e6e
·
verified ·
1 Parent(s): cfe609e

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -78,6 +78,9 @@ Here’s what makes LightDiffusion-Next stand out:
78
  - **Low-End Device Support**:
79
  Run LightDiffusion-Next on low-end devices with as little as 2GB of VRAM or even no GPU, ensuring accessibility for all users.
80
 
 
 
 
81
  ---
82
 
83
  ## ⚡ Performance Benchmarks
@@ -87,7 +90,7 @@ Here’s what makes LightDiffusion-Next stand out:
87
  | **Tool** | **Speed (it/s)** |
88
  |------------------------------------|------------------|
89
  | **LightDiffusion with Stable-Fast** | 2.8 |
90
- | **LightDiffusion** | 1.8 |
91
  | **ComfyUI** | 1.4 |
92
  | **SDForge** | 1.3 |
93
  | **SDWebUI** | 0.9 |
 
78
  - **Low-End Device Support**:
79
  Run LightDiffusion-Next on low-end devices with as little as 2GB of VRAM or even no GPU, ensuring accessibility for all users.
80
 
81
+ - **CFG++**:
82
+ Uses samplers modified to use CFG++ for better quality results compared to traditional methods.
83
+
84
  ---
85
 
86
  ## ⚡ Performance Benchmarks
 
90
  | **Tool** | **Speed (it/s)** |
91
  |------------------------------------|------------------|
92
  | **LightDiffusion with Stable-Fast** | 2.8 |
93
+ | **LightDiffusion** | 1.9 |
94
  | **ComfyUI** | 1.4 |
95
  | **SDForge** | 1.3 |
96
  | **SDWebUI** | 0.9 |
modules/sample/samplers.py CHANGED
@@ -142,6 +142,427 @@ def sample_euler(
142
  return x
143
 
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  def set_model_options_post_cfg_function(
146
  model_options, post_cfg_function, disable_cfg1_optimization=False
147
  ):
 
142
  return x
143
 
144
 
145
+ class Rescaler:
146
+ def __init__(self, model, x, mode, **extra_args):
147
+ self.model = model
148
+ self.x = x
149
+ self.mode = mode
150
+ self.extra_args = extra_args
151
+
152
+ self.latent_image, self.noise = model.latent_image, model.noise
153
+ self.denoise_mask = self.extra_args.get("denoise_mask", None)
154
+
155
+ def __enter__(self):
156
+ if self.latent_image is not None:
157
+ self.model.latent_image = torch.nn.functional.interpolate(
158
+ input=self.latent_image, size=self.x.shape[2:4], mode=self.mode
159
+ )
160
+ if self.noise is not None:
161
+ self.model.noise = torch.nn.functional.interpolate(
162
+ input=self.latent_image, size=self.x.shape[2:4], mode=self.mode
163
+ )
164
+ if self.denoise_mask is not None:
165
+ self.extra_args["denoise_mask"] = torch.nn.functional.interpolate(
166
+ input=self.denoise_mask, size=self.x.shape[2:4], mode=self.mode
167
+ )
168
+
169
+ return self
170
+
171
+ def __exit__(self, type, value, traceback):
172
+ del self.model.latent_image, self.model.noise
173
+ self.model.latent_image, self.model.noise = self.latent_image, self.noise
174
+
175
+
176
+ @torch.no_grad()
177
+ def dy_sampling_step_cfg_pp(
178
+ x,
179
+ model,
180
+ sigma_next,
181
+ i,
182
+ sigma,
183
+ sigma_hat,
184
+ callback,
185
+ current_cfg=7.5,
186
+ cfg_x0_scale=1.0,
187
+ **extra_args,
188
+ ):
189
+ """Dynamic sampling step with proper CFG++ handling"""
190
+ # Track both conditional and unconditional denoised outputs
191
+ uncond_denoised = None
192
+ old_uncond_denoised = None
193
+
194
+ def post_cfg_function(args):
195
+ nonlocal uncond_denoised
196
+ uncond_denoised = args["uncond_denoised"]
197
+ return args["denoised"]
198
+
199
+ model_options = extra_args.get("model_options", {}).copy()
200
+ extra_args["model_options"] = set_model_options_post_cfg_function(
201
+ model_options, post_cfg_function, disable_cfg1_optimization=True
202
+ )
203
+
204
+ # Process image in lower resolution
205
+ original_shape = x.shape
206
+ batch_size, channels, m, n = (
207
+ original_shape[0],
208
+ original_shape[1],
209
+ original_shape[2] // 2,
210
+ original_shape[3] // 2,
211
+ )
212
+ extra_row = x.shape[2] % 2 == 1
213
+ extra_col = x.shape[3] % 2 == 1
214
+
215
+ if extra_row:
216
+ extra_row_content = x[:, :, -1:, :]
217
+ x = x[:, :, :-1, :]
218
+ if extra_col:
219
+ extra_col_content = x[:, :, :, -1:]
220
+ x = x[:, :, :, :-1]
221
+
222
+ a_list = (
223
+ x.unfold(2, 2, 2)
224
+ .unfold(3, 2, 2)
225
+ .contiguous()
226
+ .view(batch_size, channels, m * n, 2, 2)
227
+ )
228
+ c = a_list[:, :, :, 1, 1].view(batch_size, channels, m, n)
229
+
230
+ with Rescaler(model, c, "nearest-exact", **extra_args) as rescaler:
231
+ denoised = model(c, sigma_hat * c.new_ones([c.shape[0]]), **rescaler.extra_args)
232
+
233
+ if callback is not None:
234
+ callback(
235
+ {
236
+ "x": c,
237
+ "i": i,
238
+ "sigma": sigma,
239
+ "sigma_hat": sigma_hat,
240
+ "denoised": denoised,
241
+ }
242
+ )
243
+
244
+ # Apply proper CFG++ calculation
245
+ if old_uncond_denoised is None:
246
+ # First step - regular CFG
247
+ cfg_denoised = uncond_denoised + (denoised - uncond_denoised) * current_cfg
248
+ else:
249
+ # CFG++ with momentum
250
+ momentum = denoised
251
+ uncond_momentum = uncond_denoised
252
+ x0_coeff = cfg_x0_scale * current_cfg
253
+
254
+ # Combined CFG++ update
255
+ cfg_denoised = uncond_momentum + (momentum - uncond_momentum) * x0_coeff
256
+
257
+ # Apply proper noise prediction and update
258
+ d = util.to_d(c, sigma_hat, cfg_denoised)
259
+ c = c + d * (sigma_next - sigma_hat)
260
+
261
+ # Store updated pixels back in the original tensor
262
+ d_list = c.view(batch_size, channels, m * n, 1, 1)
263
+ a_list[:, :, :, 1, 1] = d_list[:, :, :, 0, 0]
264
+ x = (
265
+ a_list.view(batch_size, channels, m, n, 2, 2)
266
+ .permute(0, 1, 2, 4, 3, 5)
267
+ .reshape(batch_size, channels, 2 * m, 2 * n)
268
+ )
269
+
270
+ if extra_row or extra_col:
271
+ x_expanded = torch.zeros(original_shape, dtype=x.dtype, device=x.device)
272
+ x_expanded[:, :, : 2 * m, : 2 * n] = x
273
+ if extra_row:
274
+ x_expanded[:, :, -1:, : 2 * n + 1] = extra_row_content
275
+ if extra_col:
276
+ x_expanded[:, :, : 2 * m, -1:] = extra_col_content
277
+ if extra_row and extra_col:
278
+ x_expanded[:, :, -1:, -1:] = extra_col_content[:, :, -1:, :]
279
+ x = x_expanded
280
+
281
+ return x
282
+
283
+
284
+ @torch.no_grad()
285
+ def sample_euler_dy_cfg_pp(
286
+ model,
287
+ x,
288
+ sigmas,
289
+ extra_args=None,
290
+ callback=None,
291
+ disable=None,
292
+ s_churn=0.0,
293
+ s_tmin=0.0,
294
+ s_tmax=float("inf"),
295
+ s_noise=1.0,
296
+ s_gamma_start=0.0,
297
+ s_gamma_end=0.0,
298
+ s_extra_steps=True,
299
+ pipeline=False,
300
+ # CFG++ parameters
301
+ cfg_scale=7.5,
302
+ cfg_x0_scale=1.0,
303
+ cfg_s_scale=1.0,
304
+ cfg_min=1.0,
305
+ **kwargs,
306
+ ):
307
+ extra_args = {} if extra_args is None else extra_args
308
+ s_in = x.new_ones([x.shape[0]])
309
+ gamma_start = (
310
+ round(s_gamma_start)
311
+ if s_gamma_start > 1.0
312
+ else (len(sigmas) - 1) * s_gamma_start
313
+ )
314
+ gamma_end = (
315
+ round(s_gamma_end) if s_gamma_end > 1.0 else (len(sigmas) - 1) * s_gamma_end
316
+ )
317
+ n_steps = len(sigmas) - 1
318
+
319
+ # CFG++ scheduling
320
+ def get_cfg_scale(step):
321
+ # Linear scheduling from cfg_scale to cfg_min
322
+ progress = step / n_steps
323
+ return cfg_scale + (cfg_min - cfg_scale) * progress
324
+
325
+ old_uncond_denoised = None
326
+
327
+ def post_cfg_function(args):
328
+ nonlocal old_uncond_denoised
329
+ old_uncond_denoised = args["uncond_denoised"]
330
+ return args["denoised"]
331
+
332
+ model_options = extra_args.get("model_options", {}).copy()
333
+ extra_args["model_options"] = set_model_options_post_cfg_function(
334
+ model_options, post_cfg_function, disable_cfg1_optimization=True
335
+ )
336
+
337
+ global disable_gui
338
+ disable_gui = pipeline
339
+
340
+ if not disable_gui:
341
+ from modules.AutoEncoders import taesd
342
+ from modules.user import app_instance
343
+
344
+ for i in trange(len(sigmas) - 1, disable=disable):
345
+ if (
346
+ not pipeline
347
+ and hasattr(app_instance.app, "interrupt_flag")
348
+ and app_instance.app.interrupt_flag
349
+ ):
350
+ return x
351
+
352
+ if not pipeline:
353
+ app_instance.app.progress.set(i / (len(sigmas) - 1))
354
+
355
+ # Get current CFG scale
356
+ current_cfg = get_cfg_scale(i)
357
+
358
+ gamma = (
359
+ max(s_churn / (len(sigmas) - 1), 2**0.5 - 1)
360
+ if gamma_start <= i < gamma_end and s_tmin <= sigmas[i] <= s_tmax
361
+ else 0.0
362
+ )
363
+ sigma_hat = sigmas[i] * (gamma + 1)
364
+
365
+ if gamma > 0:
366
+ eps = torch.randn_like(x) * s_noise
367
+ x = x + eps * (sigma_hat**2 - sigmas[i] ** 2) ** 0.5
368
+
369
+ denoised = model(x, sigma_hat * s_in, **extra_args)
370
+ uncond_denoised = extra_args.get("model_options", {}).get(
371
+ "sampler_post_cfg_function", []
372
+ )[-1]({"denoised": denoised, "uncond_denoised": None})
373
+
374
+ if callback is not None:
375
+ callback(
376
+ {
377
+ "x": x,
378
+ "i": i,
379
+ "sigma": sigmas[i],
380
+ "sigma_hat": sigma_hat,
381
+ "denoised": denoised,
382
+ "cfg_scale": current_cfg,
383
+ }
384
+ )
385
+
386
+ # CFG++ calculation
387
+ if old_uncond_denoised is None:
388
+ # First step - regular CFG
389
+ cfg_denoised = uncond_denoised + (denoised - uncond_denoised) * current_cfg
390
+ else:
391
+ # CFG++ with momentum
392
+ x0_coeff = cfg_x0_scale * current_cfg
393
+
394
+ # Simple momentum for Euler
395
+ momentum = denoised
396
+ uncond_momentum = uncond_denoised
397
+
398
+ # Combined CFG++ update
399
+ cfg_denoised = uncond_momentum + (momentum - uncond_momentum) * x0_coeff
400
+
401
+ # Euler method with CFG++ denoised result
402
+ d = util.to_d(x, sigma_hat, cfg_denoised)
403
+ x = x + d * (sigmas[i + 1] - sigma_hat)
404
+
405
+ # Store for momentum calculation
406
+ old_uncond_denoised = uncond_denoised
407
+
408
+ # Extra dynamic steps - pass the current CFG scale and predictions
409
+ if sigmas[i + 1] > 0 and s_extra_steps:
410
+ if i // 2 == 1:
411
+ x = dy_sampling_step_cfg_pp(
412
+ x,
413
+ model,
414
+ sigmas[i + 1],
415
+ i,
416
+ sigmas[i],
417
+ sigma_hat,
418
+ callback,
419
+ current_cfg=current_cfg, # Pass current CFG scale
420
+ cfg_x0_scale=cfg_x0_scale, # Pass CFG++ x0 coefficient
421
+ **extra_args,
422
+ )
423
+
424
+ if not pipeline and app_instance.app.previewer_var.get() and i % 5 == 0:
425
+ threading.Thread(target=taesd.taesd_preview, args=(x,)).start()
426
+
427
+ return x
428
+
429
+
430
+ @torch.no_grad()
431
+ def sample_euler_ancestral_dy_cfg_pp(
432
+ model,
433
+ x,
434
+ sigmas,
435
+ extra_args=None,
436
+ callback=None,
437
+ disable=None,
438
+ eta=1.0,
439
+ s_noise=1.0,
440
+ noise_sampler=None,
441
+ s_gamma_start=0.0,
442
+ s_gamma_end=0.0,
443
+ pipeline=False,
444
+ # CFG++ parameters
445
+ cfg_scale=7.5,
446
+ cfg_x0_scale=1.0,
447
+ cfg_s_scale=1.0,
448
+ cfg_min=1.0,
449
+ **kwargs,
450
+ ):
451
+ extra_args = {} if extra_args is None else extra_args
452
+ noise_sampler = (
453
+ sampling_util.default_noise_sampler(x)
454
+ if noise_sampler is None
455
+ else noise_sampler
456
+ )
457
+ gamma_start = (
458
+ round(s_gamma_start)
459
+ if s_gamma_start > 1.0
460
+ else (len(sigmas) - 1) * s_gamma_start
461
+ )
462
+ gamma_end = (
463
+ round(s_gamma_end) if s_gamma_end > 1.0 else (len(sigmas) - 1) * s_gamma_end
464
+ )
465
+ n_steps = len(sigmas) - 1
466
+
467
+ # CFG++ scheduling
468
+ def get_cfg_scale(step):
469
+ # Linear scheduling from cfg_scale to cfg_min
470
+ progress = step / n_steps
471
+ return cfg_scale + (cfg_min - cfg_scale) * progress
472
+
473
+ old_uncond_denoised = None
474
+
475
+ def post_cfg_function(args):
476
+ nonlocal old_uncond_denoised
477
+ old_uncond_denoised = args["uncond_denoised"]
478
+ return args["denoised"]
479
+
480
+ model_options = extra_args.get("model_options", {}).copy()
481
+ extra_args["model_options"] = set_model_options_post_cfg_function(
482
+ model_options, post_cfg_function, disable_cfg1_optimization=True
483
+ )
484
+
485
+ global disable_gui
486
+ disable_gui = pipeline
487
+
488
+ if not disable_gui:
489
+ from modules.AutoEncoders import taesd
490
+ from modules.user import app_instance
491
+
492
+ s_in = x.new_ones([x.shape[0]])
493
+ for i in trange(len(sigmas) - 1, disable=disable):
494
+ if (
495
+ not pipeline
496
+ and hasattr(app_instance.app, "interrupt_flag")
497
+ and app_instance.app.interrupt_flag
498
+ ):
499
+ return x
500
+
501
+ if not pipeline:
502
+ app_instance.app.progress.set(i / (len(sigmas) - 1))
503
+
504
+ # Get current CFG scale
505
+ current_cfg = get_cfg_scale(i)
506
+
507
+ gamma = 2**0.5 - 1 if gamma_start <= i < gamma_end else 0.0
508
+ sigma_hat = sigmas[i] * (gamma + 1)
509
+
510
+ if gamma > 0:
511
+ eps = torch.randn_like(x) * s_noise
512
+ x = x + eps * (sigma_hat**2 - sigmas[i] ** 2) ** 0.5
513
+
514
+ denoised = model(x, sigma_hat * s_in, **extra_args)
515
+ uncond_denoised = extra_args.get("model_options", {}).get(
516
+ "sampler_post_cfg_function", []
517
+ )[-1]({"denoised": denoised, "uncond_denoised": None})
518
+
519
+ sigma_down, sigma_up = sampling_util.get_ancestral_step(
520
+ sigmas[i], sigmas[i + 1], eta=eta
521
+ )
522
+
523
+ if callback is not None:
524
+ callback(
525
+ {
526
+ "x": x,
527
+ "i": i,
528
+ "sigma": sigmas[i],
529
+ "sigma_hat": sigma_hat,
530
+ "denoised": denoised,
531
+ "cfg_scale": current_cfg,
532
+ }
533
+ )
534
+
535
+ # CFG++ calculation
536
+ if old_uncond_denoised is None or sigmas[i + 1] == 0:
537
+ # First step or last step - regular CFG
538
+ cfg_denoised = uncond_denoised + (denoised - uncond_denoised) * current_cfg
539
+ else:
540
+ # CFG++ with momentum
541
+ x0_coeff = cfg_x0_scale * current_cfg
542
+
543
+ # Simple momentum for Euler Ancestral
544
+ momentum = denoised
545
+ uncond_momentum = uncond_denoised
546
+
547
+ # Combined CFG++ update
548
+ cfg_denoised = uncond_momentum + (momentum - uncond_momentum) * x0_coeff
549
+
550
+ # Euler ancestral method with CFG++ denoised result
551
+ d = util.to_d(x, sigma_hat, cfg_denoised)
552
+ x = x + d * (sigma_down - sigma_hat)
553
+
554
+ if sigmas[i + 1] > 0:
555
+ x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
556
+
557
+ # Store for momentum calculation
558
+ old_uncond_denoised = uncond_denoised
559
+
560
+ if not pipeline and app_instance.app.previewer_var.get() and i % 5 == 0:
561
+ threading.Thread(target=taesd.taesd_preview, args=(x,)).start()
562
+
563
+ return x
564
+
565
+
566
  def set_model_options_post_cfg_function(
567
  model_options, post_cfg_function, disable_cfg1_optimization=False
568
  ):
modules/sample/sampling.py CHANGED
@@ -517,14 +517,14 @@ def ksampler(
517
  if sampler_name == "dpmpp_2m_cfgpp":
518
  sampler_function = samplers.sample_dpmpp_2m_cfgpp
519
 
520
- elif sampler_name == "euler_ancestral":
521
- sampler_function = samplers.sample_euler_ancestral
522
 
523
  elif sampler_name == "dpmpp_sde_cfgpp":
524
  sampler_function = samplers.sample_dpmpp_sde_cfgpp
525
 
526
- elif sampler_name == "euler":
527
- sampler_function = samplers.sample_euler
528
 
529
  else:
530
  # Default fallback
 
517
  if sampler_name == "dpmpp_2m_cfgpp":
518
  sampler_function = samplers.sample_dpmpp_2m_cfgpp
519
 
520
+ elif sampler_name == "euler_ancestral_cfgpp":
521
+ sampler_function = samplers.sample_euler_ancestral_dy_cfg_pp
522
 
523
  elif sampler_name == "dpmpp_sde_cfgpp":
524
  sampler_function = samplers.sample_dpmpp_sde_cfgpp
525
 
526
+ elif sampler_name == "euler_cfgpp":
527
+ sampler_function = samplers.sample_euler_dy_cfg_pp
528
 
529
  else:
530
  # Default fallback
modules/user/GUI.py CHANGED
@@ -779,7 +779,7 @@ class App(tk.Tk):
779
  seed=random.randint(1, 2**64),
780
  steps=10,
781
  cfg=8,
782
- sampler_name="euler_ancestral",
783
  scheduler="normal",
784
  denoise=0.45,
785
  model=hidiffoptimizer.go(
@@ -997,7 +997,7 @@ class App(tk.Tk):
997
  seed=random.randint(1, 2**64),
998
  steps=20,
999
  cfg=1,
1000
- sampler_name="euler",
1001
  scheduler="beta",
1002
  denoise=1,
1003
  model=unetloadergguf_10[0],
 
779
  seed=random.randint(1, 2**64),
780
  steps=10,
781
  cfg=8,
782
+ sampler_name="euler_ancestral_cfgpp",
783
  scheduler="normal",
784
  denoise=0.45,
785
  model=hidiffoptimizer.go(
 
997
  seed=random.randint(1, 2**64),
998
  steps=20,
999
  cfg=1,
1000
+ sampler_name="euler_cfgpp",
1001
  scheduler="beta",
1002
  denoise=1,
1003
  model=unetloadergguf_10[0],
modules/user/pipeline.py CHANGED
@@ -218,7 +218,7 @@ def pipeline(
218
  seed=random.randint(1, 2**64),
219
  steps=20,
220
  cfg=1,
221
- sampler_name="euler",
222
  scheduler="beta",
223
  denoise=1,
224
  model=unetloadergguf_10[0],
@@ -313,7 +313,7 @@ def pipeline(
313
  seed=random.randint(1, 2**64),
314
  steps=10,
315
  cfg=8,
316
- sampler_name="euler_ancestral",
317
  scheduler="normal",
318
  denoise=0.45,
319
  model=hidiffoptimizer.go(
 
218
  seed=random.randint(1, 2**64),
219
  steps=20,
220
  cfg=1,
221
+ sampler_name="euler_cfgpp",
222
  scheduler="beta",
223
  denoise=1,
224
  model=unetloadergguf_10[0],
 
313
  seed=random.randint(1, 2**64),
314
  steps=10,
315
  cfg=8,
316
+ sampler_name="euler_ancestral_cfgpp",
317
  scheduler="normal",
318
  denoise=0.45,
319
  model=hidiffoptimizer.go(