ford442 commited on
Commit
9c048b3
·
1 Parent(s): 44d5ad0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -0
app.py CHANGED
@@ -19,6 +19,9 @@ from typing import Tuple
19
  #from transformers import AutoTokenizer, AutoModelForCausalLM
20
  import paramiko
21
 
 
 
 
22
  #os.system("chmod +x ./cusparselt.sh")
23
  #os.system("./cusparselt.sh")
24
  #os.system("chmod +x ./cudnn.sh")
@@ -117,6 +120,10 @@ def load_and_prepare_model(model_id):
117
  add_watermarker=False,
118
  use_safetensors=True,
119
  ).to(torch.bfloat16).to('cuda')
 
 
 
 
120
  pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
121
  if ENABLE_CPU_OFFLOAD:
122
  pipe.enable_model_cpu_offload()
 
19
  #from transformers import AutoTokenizer, AutoModelForCausalLM
20
  import paramiko
21
 
22
+ from xformers.ops import MemoryEfficientAttentionFlashAttentionOp
23
+
24
+
25
  #os.system("chmod +x ./cusparselt.sh")
26
  #os.system("./cusparselt.sh")
27
  #os.system("chmod +x ./cudnn.sh")
 
120
  add_watermarker=False,
121
  use_safetensors=True,
122
  ).to(torch.bfloat16).to('cuda')
123
+ pipe.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)
124
+ # Workaround for not accepting attention shape using VAE for Flash Attention
125
+ pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None)
126
+
127
  pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
128
  if ENABLE_CPU_OFFLOAD:
129
  pipe.enable_model_cpu_offload()