Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import time
|
|
7 |
from diffusers import DiffusionPipeline, AutoencoderTiny
|
8 |
from diffusers.models.attention_processor import AttnProcessor2_0
|
9 |
from custom_pipeline import FluxWithCFGPipeline
|
|
|
10 |
|
11 |
torch.backends.cuda.matmul.allow_tf32 = True
|
12 |
|
@@ -19,18 +20,26 @@ DEFAULT_INFERENCE_STEPS = 1
|
|
19 |
|
20 |
# Device and model setup
|
21 |
dtype = torch.float16
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
pipe = FluxWithCFGPipeline.from_pretrained(
|
23 |
"black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
|
24 |
)
|
25 |
pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype)
|
26 |
pipe.to("cuda")
|
27 |
-
|
|
|
|
|
28 |
pipe.set_adapters(["better"], adapter_weights=[1.0])
|
29 |
pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
|
30 |
pipe.unload_lora_weights()
|
31 |
|
32 |
# Memory optimizations
|
33 |
-
pipe.
|
34 |
pipe.enable_xformers_memory_efficient_attention() # Flash Attention
|
35 |
|
36 |
# CUDA Graph setup
|
|
|
7 |
from diffusers import DiffusionPipeline, AutoencoderTiny
|
8 |
from diffusers.models.attention_processor import AttnProcessor2_0
|
9 |
from custom_pipeline import FluxWithCFGPipeline
|
10 |
+
from huggingface_hub import hf_hub_download
|
11 |
|
12 |
torch.backends.cuda.matmul.allow_tf32 = True
|
13 |
|
|
|
20 |
|
21 |
# Device and model setup
|
22 |
dtype = torch.float16
|
23 |
+
# Download the LoRA weights using hf_hub_download
|
24 |
+
lora_weights_path = hf_hub_download(
|
25 |
+
repo_id="hugovntr/flux-schnell-realism",
|
26 |
+
filename="schnell-realism_v2.3.safetensors",
|
27 |
+
)
|
28 |
+
|
29 |
pipe = FluxWithCFGPipeline.from_pretrained(
|
30 |
"black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
|
31 |
)
|
32 |
pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype)
|
33 |
pipe.to("cuda")
|
34 |
+
|
35 |
+
# Load the LoRA weights using the downloaded path
|
36 |
+
pipe.load_lora_weights(lora_weights_path, adapter_name="better")
|
37 |
pipe.set_adapters(["better"], adapter_weights=[1.0])
|
38 |
pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
|
39 |
pipe.unload_lora_weights()
|
40 |
|
41 |
# Memory optimizations
|
42 |
+
pipe.transformers.to(memory_format=torch.channels_last) # Channels last
|
43 |
pipe.enable_xformers_memory_efficient_attention() # Flash Attention
|
44 |
|
45 |
# CUDA Graph setup
|