Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,87 +6,18 @@ import json
|
|
6 |
import torch
|
7 |
import spaces
|
8 |
|
9 |
-
from
|
10 |
-
from diffusers import (
|
11 |
-
AutoencoderKL,
|
12 |
-
SD3Transformer2DModel,
|
13 |
-
StableDiffusion3Pipeline,
|
14 |
-
FlowMatchEulerDiscreteScheduler
|
15 |
-
)
|
16 |
-
from diffusers.loaders.single_file_utils import (
|
17 |
-
convert_sd3_transformer_checkpoint_to_diffusers,
|
18 |
-
)
|
19 |
-
from transformers import (
|
20 |
-
CLIPTextModelWithProjection,
|
21 |
-
CLIPTokenizer,
|
22 |
-
T5EncoderModel,
|
23 |
-
T5Tokenizer
|
24 |
-
)
|
25 |
-
from accelerate import init_empty_weights
|
26 |
-
from accelerate.utils import set_module_tensor_to_device
|
27 |
-
from safetensors import safe_open
|
28 |
|
29 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
30 |
-
model_repo_id = "
|
31 |
-
|
32 |
-
finetune_filename = "Absynth_SD3.5L_2.0.safetensors"
|
33 |
|
34 |
if torch.cuda.is_available():
|
35 |
torch_dtype = torch.bfloat16
|
36 |
else:
|
37 |
torch_dtype = torch.float32
|
38 |
|
39 |
-
|
40 |
-
config_file = hf_hub_download(repo_id=model_repo_id, filename="transformer/config.json")
|
41 |
-
with open(config_file, "r") as fp:
|
42 |
-
config = json.load(fp)
|
43 |
-
with init_empty_weights():
|
44 |
-
transformer = SD3Transformer2DModel.from_config(config)
|
45 |
-
|
46 |
-
# Get transformer state dict and load
|
47 |
-
model_file = hf_hub_download(repo_id=finetune_repo_id, filename=finetune_filename)
|
48 |
-
state_dict = {}
|
49 |
-
with safe_open(model_file, framework="pt") as f:
|
50 |
-
for key in f.keys():
|
51 |
-
state_dict[key] = f.get_tensor(key)
|
52 |
-
|
53 |
-
state_dict = convert_sd3_transformer_checkpoint_to_diffusers(state_dict)
|
54 |
-
for key, value in state_dict.items():
|
55 |
-
set_module_tensor_to_device(
|
56 |
-
transformer,
|
57 |
-
key,
|
58 |
-
device,
|
59 |
-
value=value,
|
60 |
-
dtype=torch_dtype
|
61 |
-
)
|
62 |
-
|
63 |
-
# Try to keep memory usage down
|
64 |
-
del state_dict
|
65 |
-
gc.collect()
|
66 |
-
|
67 |
-
# Initialize models from base SD3.5
|
68 |
-
vae = AutoencoderKL.from_pretrained(model_repo_id, subfolder="vae")
|
69 |
-
text_encoder = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder")
|
70 |
-
text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder_2")
|
71 |
-
text_encoder_3 = T5EncoderModel.from_pretrained(model_repo_id, subfolder="text_encoder_3")
|
72 |
-
tokenizer = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer")
|
73 |
-
tokenizer_2 = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_2")
|
74 |
-
tokenizer_3 = T5Tokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_3")
|
75 |
-
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo_id, subfolder="scheduler")
|
76 |
-
|
77 |
-
# Create pipeline from our models
|
78 |
-
pipe = StableDiffusion3Pipeline(
|
79 |
-
vae=vae,
|
80 |
-
scheduler=scheduler,
|
81 |
-
text_encoder=text_encoder,
|
82 |
-
text_encoder_2=text_encoder_2,
|
83 |
-
text_encoder_3=text_encoder_3,
|
84 |
-
tokenizer=tokenizer,
|
85 |
-
tokenizer_2=tokenizer_2,
|
86 |
-
tokenizer_3=tokenizer_3,
|
87 |
-
transformer=transformer
|
88 |
-
)
|
89 |
-
pipe = pipe.to(device, dtype=torch_dtype)
|
90 |
|
91 |
MAX_SEED = np.iinfo(np.int32).max
|
92 |
MAX_IMAGE_SIZE = 1536
|
@@ -99,8 +30,10 @@ def infer(
|
|
99 |
randomize_seed=False,
|
100 |
width=1024,
|
101 |
height=1024,
|
102 |
-
guidance_scale=4.
|
103 |
-
num_inference_steps=
|
|
|
|
|
104 |
progress=gr.Progress(track_tqdm=True),
|
105 |
):
|
106 |
if randomize_seed:
|
@@ -115,6 +48,8 @@ def infer(
|
|
115 |
num_inference_steps=num_inference_steps,
|
116 |
width=width,
|
117 |
height=height,
|
|
|
|
|
118 |
generator=generator,
|
119 |
).images[0]
|
120 |
|
@@ -122,7 +57,7 @@ def infer(
|
|
122 |
|
123 |
|
124 |
examples = [
|
125 |
-
"
|
126 |
]
|
127 |
|
128 |
css = """
|
@@ -134,8 +69,7 @@ css = """
|
|
134 |
|
135 |
with gr.Blocks(css=css) as demo:
|
136 |
with gr.Column(elem_id="col-container"):
|
137 |
-
gr.Markdown(" # [
|
138 |
-
gr.Markdown("Finetuned from [Stable Diffusion 3.5 Large (8B)](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) by [Stability AI](https://stability.ai/news/introducing-stable-diffusion-3-5).")
|
139 |
with gr.Row():
|
140 |
prompt = gr.Text(
|
141 |
label="Prompt",
|
@@ -163,7 +97,21 @@ with gr.Blocks(css=css) as demo:
|
|
163 |
step=1,
|
164 |
value=0,
|
165 |
)
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
168 |
|
169 |
with gr.Row():
|
@@ -172,7 +120,7 @@ with gr.Blocks(css=css) as demo:
|
|
172 |
minimum=512,
|
173 |
maximum=MAX_IMAGE_SIZE,
|
174 |
step=32,
|
175 |
-
value=
|
176 |
)
|
177 |
|
178 |
height = gr.Slider(
|
@@ -180,7 +128,7 @@ with gr.Blocks(css=css) as demo:
|
|
180 |
minimum=512,
|
181 |
maximum=MAX_IMAGE_SIZE,
|
182 |
step=32,
|
183 |
-
value=
|
184 |
)
|
185 |
|
186 |
with gr.Row():
|
@@ -189,15 +137,15 @@ with gr.Blocks(css=css) as demo:
|
|
189 |
minimum=0.0,
|
190 |
maximum=7.5,
|
191 |
step=0.1,
|
192 |
-
value=4.
|
193 |
)
|
194 |
|
195 |
num_inference_steps = gr.Slider(
|
196 |
label="Number of inference steps",
|
197 |
minimum=1,
|
198 |
-
maximum=
|
199 |
step=1,
|
200 |
-
value=
|
201 |
)
|
202 |
|
203 |
gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
|
@@ -214,6 +162,8 @@ with gr.Blocks(css=css) as demo:
|
|
214 |
height,
|
215 |
guidance_scale,
|
216 |
num_inference_steps,
|
|
|
|
|
217 |
],
|
218 |
outputs=[result, seed],
|
219 |
)
|
|
|
6 |
import torch
|
7 |
import spaces
|
8 |
|
9 |
+
from diffusers import Lumina2Text2ImgPipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
12 |
+
model_repo_id = "Alpha-VLLM/Lumina-Image-2.0"
|
13 |
+
|
|
|
14 |
|
15 |
if torch.cuda.is_available():
|
16 |
torch_dtype = torch.bfloat16
|
17 |
else:
|
18 |
torch_dtype = torch.float32
|
19 |
|
20 |
+
pipe = Lumina2Text2ImgPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
MAX_SEED = np.iinfo(np.int32).max
|
23 |
MAX_IMAGE_SIZE = 1536
|
|
|
30 |
randomize_seed=False,
|
31 |
width=1024,
|
32 |
height=1024,
|
33 |
+
guidance_scale=4.0,
|
34 |
+
num_inference_steps=50,
|
35 |
+
cfg_normalization=True,
|
36 |
+
cfg_trunc_ratio=0.25,
|
37 |
progress=gr.Progress(track_tqdm=True),
|
38 |
):
|
39 |
if randomize_seed:
|
|
|
48 |
num_inference_steps=num_inference_steps,
|
49 |
width=width,
|
50 |
height=height,
|
51 |
+
cfg_normalization=cfg_normalization,
|
52 |
+
cfg_trunc_ratio=cfg_trunc_ratio,
|
53 |
generator=generator,
|
54 |
).images[0]
|
55 |
|
|
|
57 |
|
58 |
|
59 |
examples = [
|
60 |
+
"A serene photograph capturing the golden reflection of the sun on a vast expanse of water. The sun is positioned at the top center, casting a brilliant, shimmering trail of light across the rippling surface. The water is textured with gentle waves, creating a rhythmic pattern that leads the eye towards the horizon. The entire scene is bathed in warm, golden hues, enhancing the tranquil and meditative atmosphere. High contrast, natural lighting, golden hour, photorealistic, expansive composition, reflective surface, peaceful, visually harmonious.",
|
61 |
]
|
62 |
|
63 |
css = """
|
|
|
69 |
|
70 |
with gr.Blocks(css=css) as demo:
|
71 |
with gr.Column(elem_id="col-container"):
|
72 |
+
gr.Markdown(" # [Lumina Image v2.0](https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0) by [Alpha-VLLM](https://huggingface.co/Alpha-VLLM)")
|
|
|
73 |
with gr.Row():
|
74 |
prompt = gr.Text(
|
75 |
label="Prompt",
|
|
|
97 |
step=1,
|
98 |
value=0,
|
99 |
)
|
100 |
+
|
101 |
+
with gr.Row():
|
102 |
+
cfg_normalization = gr.Checkbox(
|
103 |
+
label="CFG Normalization",
|
104 |
+
value=True
|
105 |
+
)
|
106 |
+
|
107 |
+
cfg_trunc_ratio = gr.Slider(
|
108 |
+
label="CFG Truncation Ratio",
|
109 |
+
minimum=0.0,
|
110 |
+
maximum=1.0,
|
111 |
+
step=0.05,
|
112 |
+
value=0.25,
|
113 |
+
)
|
114 |
+
|
115 |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
116 |
|
117 |
with gr.Row():
|
|
|
120 |
minimum=512,
|
121 |
maximum=MAX_IMAGE_SIZE,
|
122 |
step=32,
|
123 |
+
value=1024,
|
124 |
)
|
125 |
|
126 |
height = gr.Slider(
|
|
|
128 |
minimum=512,
|
129 |
maximum=MAX_IMAGE_SIZE,
|
130 |
step=32,
|
131 |
+
value=1024,
|
132 |
)
|
133 |
|
134 |
with gr.Row():
|
|
|
137 |
minimum=0.0,
|
138 |
maximum=7.5,
|
139 |
step=0.1,
|
140 |
+
value=4.0,
|
141 |
)
|
142 |
|
143 |
num_inference_steps = gr.Slider(
|
144 |
label="Number of inference steps",
|
145 |
minimum=1,
|
146 |
+
maximum=100,
|
147 |
step=1,
|
148 |
+
value=50,
|
149 |
)
|
150 |
|
151 |
gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
|
|
|
162 |
height,
|
163 |
guidance_scale,
|
164 |
num_inference_steps,
|
165 |
+
cfg_normalization,
|
166 |
+
cfg_trunc_ratio,
|
167 |
],
|
168 |
outputs=[result, seed],
|
169 |
)
|