prithivMLmods commited on
Commit
d858274
·
verified ·
1 Parent(s): a58ff01

This compiles the repeated transformer blocks for ~2x faster initialization.

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -5,14 +5,13 @@ from PIL import Image
5
  from diffusers import DiffusionPipeline
6
  import random
7
  import uuid
8
- from typing import Union, List, Optional
9
  import numpy as np
10
  import time
11
  import zipfile
12
  import os
13
 
14
  # Description for the app
15
- DESCRIPTION = """## Qwen Image Hpc/."""
16
 
17
  # Helper functions
18
  def save_image(img):
@@ -28,12 +27,16 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
28
  MAX_SEED = np.iinfo(np.int32).max
29
  MAX_IMAGE_SIZE = 2048
30
 
31
- # Load Qwen/Qwen-Image pipeline
32
  dtype = torch.bfloat16
33
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34
 
35
- # --- Model Loading ---
36
- pipe_qwen = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image", torch_dtype=dtype).to(device)
 
 
 
 
37
 
38
  # Aspect ratios
39
  aspect_ratios = {
 
5
  from diffusers import DiffusionPipeline
6
  import random
7
  import uuid
 
8
  import numpy as np
9
  import time
10
  import zipfile
11
  import os
12
 
13
  # Description for the app
14
+ DESCRIPTION = """## Qwen Image HPC/."""
15
 
16
  # Helper functions
17
  def save_image(img):
 
27
  MAX_SEED = np.iinfo(np.int32).max
28
  MAX_IMAGE_SIZE = 2048
29
 
30
+ # Load Qwen/Qwen-Image pipeline with regional compilation
31
  dtype = torch.bfloat16
32
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33
 
34
+ # --- Model Loading with Regional Compilation ---
35
+ ckpt_id = "Qwen/Qwen-Image"
36
+ pipe_qwen = DiffusionPipeline.from_pretrained(
37
+ ckpt_id, torch_dtype=dtype
38
+ ).to(device)
39
+ pipe_qwen.transformer.compile_repeated_blocks(fullgraph=True)
40
 
41
  # Aspect ratios
42
  aspect_ratios = {