benjamin-paine commited on
Commit
add09dc
·
verified ·
1 Parent(s): 0a55839

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -84
app.py CHANGED
@@ -6,87 +6,18 @@ import json
6
  import torch
7
  import spaces
8
 
9
- from huggingface_hub import hf_hub_download
10
- from diffusers import (
11
- AutoencoderKL,
12
- SD3Transformer2DModel,
13
- StableDiffusion3Pipeline,
14
- FlowMatchEulerDiscreteScheduler
15
- )
16
- from diffusers.loaders.single_file_utils import (
17
- convert_sd3_transformer_checkpoint_to_diffusers,
18
- )
19
- from transformers import (
20
- CLIPTextModelWithProjection,
21
- CLIPTokenizer,
22
- T5EncoderModel,
23
- T5Tokenizer
24
- )
25
- from accelerate import init_empty_weights
26
- from accelerate.utils import set_module_tensor_to_device
27
- from safetensors import safe_open
28
 
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
- model_repo_id = "stabilityai/stable-diffusion-3.5-large"
31
- finetune_repo_id = "DoctorDiffusion/Absynth-2.0"
32
- finetune_filename = "Absynth_SD3.5L_2.0.safetensors"
33
 
34
  if torch.cuda.is_available():
35
  torch_dtype = torch.bfloat16
36
  else:
37
  torch_dtype = torch.float32
38
 
39
- # Initialize transformer
40
- config_file = hf_hub_download(repo_id=model_repo_id, filename="transformer/config.json")
41
- with open(config_file, "r") as fp:
42
- config = json.load(fp)
43
- with init_empty_weights():
44
- transformer = SD3Transformer2DModel.from_config(config)
45
-
46
- # Get transformer state dict and load
47
- model_file = hf_hub_download(repo_id=finetune_repo_id, filename=finetune_filename)
48
- state_dict = {}
49
- with safe_open(model_file, framework="pt") as f:
50
- for key in f.keys():
51
- state_dict[key] = f.get_tensor(key)
52
-
53
- state_dict = convert_sd3_transformer_checkpoint_to_diffusers(state_dict)
54
- for key, value in state_dict.items():
55
- set_module_tensor_to_device(
56
- transformer,
57
- key,
58
- device,
59
- value=value,
60
- dtype=torch_dtype
61
- )
62
-
63
- # Try to keep memory usage down
64
- del state_dict
65
- gc.collect()
66
-
67
- # Initialize models from base SD3.5
68
- vae = AutoencoderKL.from_pretrained(model_repo_id, subfolder="vae")
69
- text_encoder = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder")
70
- text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder_2")
71
- text_encoder_3 = T5EncoderModel.from_pretrained(model_repo_id, subfolder="text_encoder_3")
72
- tokenizer = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer")
73
- tokenizer_2 = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_2")
74
- tokenizer_3 = T5Tokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_3")
75
- scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo_id, subfolder="scheduler")
76
-
77
- # Create pipeline from our models
78
- pipe = StableDiffusion3Pipeline(
79
- vae=vae,
80
- scheduler=scheduler,
81
- text_encoder=text_encoder,
82
- text_encoder_2=text_encoder_2,
83
- text_encoder_3=text_encoder_3,
84
- tokenizer=tokenizer,
85
- tokenizer_2=tokenizer_2,
86
- tokenizer_3=tokenizer_3,
87
- transformer=transformer
88
- )
89
- pipe = pipe.to(device, dtype=torch_dtype)
90
 
91
  MAX_SEED = np.iinfo(np.int32).max
92
  MAX_IMAGE_SIZE = 1536
@@ -99,8 +30,10 @@ def infer(
99
  randomize_seed=False,
100
  width=1024,
101
  height=1024,
102
- guidance_scale=4.5,
103
- num_inference_steps=40,
 
 
104
  progress=gr.Progress(track_tqdm=True),
105
  ):
106
  if randomize_seed:
@@ -115,6 +48,8 @@ def infer(
115
  num_inference_steps=num_inference_steps,
116
  width=width,
117
  height=height,
 
 
118
  generator=generator,
119
  ).images[0]
120
 
@@ -122,7 +57,7 @@ def infer(
122
 
123
 
124
  examples = [
125
- "An astrounaut encounters an alien on the moon, photograph",
126
  ]
127
 
128
  css = """
@@ -134,8 +69,7 @@ css = """
134
 
135
  with gr.Blocks(css=css) as demo:
136
  with gr.Column(elem_id="col-container"):
137
- gr.Markdown(" # [Absynth 2.0](https://huggingface.co/DoctorDiffusion/Absynth-2.0) by [DoctorDiffusion](https://civitai.com/user/doctor_diffusion)")
138
- gr.Markdown("Finetuned from [Stable Diffusion 3.5 Large (8B)](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) by [Stability AI](https://stability.ai/news/introducing-stable-diffusion-3-5).")
139
  with gr.Row():
140
  prompt = gr.Text(
141
  label="Prompt",
@@ -163,7 +97,21 @@ with gr.Blocks(css=css) as demo:
163
  step=1,
164
  value=0,
165
  )
166
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
168
 
169
  with gr.Row():
@@ -172,7 +120,7 @@ with gr.Blocks(css=css) as demo:
172
  minimum=512,
173
  maximum=MAX_IMAGE_SIZE,
174
  step=32,
175
- value=768,
176
  )
177
 
178
  height = gr.Slider(
@@ -180,7 +128,7 @@ with gr.Blocks(css=css) as demo:
180
  minimum=512,
181
  maximum=MAX_IMAGE_SIZE,
182
  step=32,
183
- value=1344,
184
  )
185
 
186
  with gr.Row():
@@ -189,15 +137,15 @@ with gr.Blocks(css=css) as demo:
189
  minimum=0.0,
190
  maximum=7.5,
191
  step=0.1,
192
- value=4.5,
193
  )
194
 
195
  num_inference_steps = gr.Slider(
196
  label="Number of inference steps",
197
  minimum=1,
198
- maximum=50,
199
  step=1,
200
- value=40,
201
  )
202
 
203
  gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
@@ -214,6 +162,8 @@ with gr.Blocks(css=css) as demo:
214
  height,
215
  guidance_scale,
216
  num_inference_steps,
 
 
217
  ],
218
  outputs=[result, seed],
219
  )
 
6
  import torch
7
  import spaces
8
 
9
+ from diffusers import Lumina2Text2ImgPipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ model_repo_id = "Alpha-VLLM/Lumina-Image-2.0"
13
+
 
14
 
15
  if torch.cuda.is_available():
16
  torch_dtype = torch.bfloat16
17
  else:
18
  torch_dtype = torch.float32
19
 
20
+ pipe = Lumina2Text2ImgPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  MAX_SEED = np.iinfo(np.int32).max
23
  MAX_IMAGE_SIZE = 1536
 
30
  randomize_seed=False,
31
  width=1024,
32
  height=1024,
33
+ guidance_scale=4.0,
34
+ num_inference_steps=50,
35
+ cfg_normalization=True,
36
+ cfg_trunc_ratio=0.25,
37
  progress=gr.Progress(track_tqdm=True),
38
  ):
39
  if randomize_seed:
 
48
  num_inference_steps=num_inference_steps,
49
  width=width,
50
  height=height,
51
+ cfg_normalization=cfg_normalization,
52
+ cfg_trunc_ratio=cfg_trunc_ratio,
53
  generator=generator,
54
  ).images[0]
55
 
 
57
 
58
 
59
  examples = [
60
+ "A serene photograph capturing the golden reflection of the sun on a vast expanse of water. The sun is positioned at the top center, casting a brilliant, shimmering trail of light across the rippling surface. The water is textured with gentle waves, creating a rhythmic pattern that leads the eye towards the horizon. The entire scene is bathed in warm, golden hues, enhancing the tranquil and meditative atmosphere. High contrast, natural lighting, golden hour, photorealistic, expansive composition, reflective surface, peaceful, visually harmonious.",
61
  ]
62
 
63
  css = """
 
69
 
70
  with gr.Blocks(css=css) as demo:
71
  with gr.Column(elem_id="col-container"):
72
+ gr.Markdown(" # [Lumina Image v2.0](https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0) by [Alpha-VLLM](https://huggingface.co/Alpha-VLLM)")
 
73
  with gr.Row():
74
  prompt = gr.Text(
75
  label="Prompt",
 
97
  step=1,
98
  value=0,
99
  )
100
+
101
+ with gr.Row():
102
+ cfg_normalization = gr.Checkbox(
103
+ label="CFG Normalization",
104
+ value=True
105
+ )
106
+
107
+ cfg_trunc_ratio = gr.Slider(
108
+ label="CFG Truncation Ratio",
109
+ minimum=0.0,
110
+ maximum=1.0,
111
+ step=0.05,
112
+ value=0.25,
113
+ )
114
+
115
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
116
 
117
  with gr.Row():
 
120
  minimum=512,
121
  maximum=MAX_IMAGE_SIZE,
122
  step=32,
123
+ value=1024,
124
  )
125
 
126
  height = gr.Slider(
 
128
  minimum=512,
129
  maximum=MAX_IMAGE_SIZE,
130
  step=32,
131
+ value=1024,
132
  )
133
 
134
  with gr.Row():
 
137
  minimum=0.0,
138
  maximum=7.5,
139
  step=0.1,
140
+ value=4.0,
141
  )
142
 
143
  num_inference_steps = gr.Slider(
144
  label="Number of inference steps",
145
  minimum=1,
146
+ maximum=100,
147
  step=1,
148
+ value=50,
149
  )
150
 
151
  gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
 
162
  height,
163
  guidance_scale,
164
  num_inference_steps,
165
+ cfg_normalization,
166
+ cfg_trunc_ratio,
167
  ],
168
  outputs=[result, seed],
169
  )