Spaces:

Ryukijano
/

Fastest-image-generation

Runtime error

Ryukijano commited on Dec 9, 2024

Commit

cf48cc6

verified ·

1 Parent(s): 858fb7b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import time
 from diffusers import DiffusionPipeline, AutoencoderTiny
 from diffusers.models.attention_processor import AttnProcessor2_0
 from custom_pipeline import FluxWithCFGPipeline
 torch.backends.cuda.matmul.allow_tf32 = True
@@ -19,18 +20,26 @@ DEFAULT_INFERENCE_STEPS = 1
 # Device and model setup
 dtype = torch.float16
 pipe = FluxWithCFGPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
 )
 pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype)
 pipe.to("cuda")
-pipe.load_lora_weights('hugovntr/flux-schnell-realism', weight_name='schnell-realism_v2.3.safetensors', adapter_name="better")
 pipe.set_adapters(["better"], adapter_weights=[1.0])
 pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
 pipe.unload_lora_weights()
 # Memory optimizations
-pipe.unet.to(memory_format=torch.channels_last)  # Channels last
 pipe.enable_xformers_memory_efficient_attention()  # Flash Attention
 # CUDA Graph setup

 from diffusers import DiffusionPipeline, AutoencoderTiny
 from diffusers.models.attention_processor import AttnProcessor2_0
 from custom_pipeline import FluxWithCFGPipeline
+from huggingface_hub import hf_hub_download
 torch.backends.cuda.matmul.allow_tf32 = True
 # Device and model setup
 dtype = torch.float16
+# Download the LoRA weights using hf_hub_download
+lora_weights_path = hf_hub_download(
+    repo_id="hugovntr/flux-schnell-realism",
+    filename="schnell-realism_v2.3.safetensors",
+)
 pipe = FluxWithCFGPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
 )
 pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype)
 pipe.to("cuda")
+# Load the LoRA weights using the downloaded path
+pipe.load_lora_weights(lora_weights_path, adapter_name="better")
 pipe.set_adapters(["better"], adapter_weights=[1.0])
 pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
 pipe.unload_lora_weights()
 # Memory optimizations
+pipe.transformers.to(memory_format=torch.channels_last)  # Channels last
 pipe.enable_xformers_memory_efficient_attention()  # Flash Attention
 # CUDA Graph setup