Futuretop commited on
Commit
5d40972
·
verified ·
1 Parent(s): 4932e29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -35,12 +35,17 @@ pipe = DiffusionPipeline.from_pretrained(
35
  ),
36
  tokenizer=tokenizer,
37
  feature_extractor=feature_extractor,
38
- torch_dtype=torch.bfloat16,
39
- use_safetensors=True,
40
- device_map="balanced", # automatically spreads submodules CPU/GPU
41
- offload_folder="offload" # where to spill CPU-offloaded weights
42
  )
43
 
 
 
 
 
 
 
44
  pipe = pipe.to(device)
45
 
46
  MAX_SEED = np.iinfo(np.int32).max
 
35
  ),
36
  tokenizer=tokenizer,
37
  feature_extractor=feature_extractor,
38
+ torch_dtype=torch.float16, # load weights in half-precision
39
+ revision="fp16", # if your repo provides fp16 weights
40
+ use_safetensors=True
 
41
  )
42
 
43
+ # 4) Memory savings hooks (all on your single GPU + CPU offload)
44
+ pipe.enable_attention_slicing() # slice big attention maps
45
+ pipe.enable_vae_slicing() # slice VAE decode
46
+ pipe.enable_xformers_memory_efficient_attention() # if xformers is installed
47
+ pipe.enable_model_cpu_offload() # offload idle submodules to CPU
48
+
49
  pipe = pipe.to(device)
50
 
51
  MAX_SEED = np.iinfo(np.int32).max