RealVis_v5.0_BF16_IP_B

Runtime error

ford442 commited on Dec 6, 2024

Commit

9c048b3

1 Parent(s): 44d5ad0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,6 +19,9 @@ from typing import Tuple
 #from transformers import AutoTokenizer, AutoModelForCausalLM
 import paramiko
 #os.system("chmod +x ./cusparselt.sh")
 #os.system("./cusparselt.sh")
 #os.system("chmod +x ./cudnn.sh")
@@ -117,6 +120,10 @@ def load_and_prepare_model(model_id):
         add_watermarker=False,
         use_safetensors=True,
     ).to(torch.bfloat16).to('cuda')
     pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
     if ENABLE_CPU_OFFLOAD:
         pipe.enable_model_cpu_offload()

 #from transformers import AutoTokenizer, AutoModelForCausalLM
 import paramiko
+from xformers.ops import MemoryEfficientAttentionFlashAttentionOp
 #os.system("chmod +x ./cusparselt.sh")
 #os.system("./cusparselt.sh")
 #os.system("chmod +x ./cudnn.sh")
         add_watermarker=False,
         use_safetensors=True,
     ).to(torch.bfloat16).to('cuda')
+    pipe.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)
+# Workaround for not accepting attention shape using VAE for Flash Attention
+    pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None)
     pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
     if ENABLE_CPU_OFFLOAD:
         pipe.enable_model_cpu_offload()