Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +5 -10
- custom_pipeline.py +17 -32
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import random
|
|
|
4 |
import torch
|
5 |
import time
|
6 |
from diffusers import DiffusionPipeline, AutoencoderTiny
|
@@ -8,7 +9,6 @@ from diffusers.models.attention_processor import AttnProcessor2_0
|
|
8 |
from custom_pipeline import FluxWithCFGPipeline
|
9 |
|
10 |
torch.backends.cuda.matmul.allow_tf32 = True
|
11 |
-
torch.backends.cudnn.benchmark = True
|
12 |
|
13 |
# Constants
|
14 |
MAX_SEED = np.iinfo(np.int32).max
|
@@ -18,7 +18,7 @@ DEFAULT_HEIGHT = 1024
|
|
18 |
DEFAULT_INFERENCE_STEPS = 1
|
19 |
|
20 |
# Device and model setup
|
21 |
-
dtype = torch.
|
22 |
pipe = FluxWithCFGPipeline.from_pretrained(
|
23 |
"black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
|
24 |
)
|
@@ -28,16 +28,11 @@ pipe.load_lora_weights('hugovntr/flux-schnell-realism', weight_name='schnell-rea
|
|
28 |
pipe.set_adapters(["better"], adapter_weights=[1.0])
|
29 |
pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
|
30 |
pipe.unload_lora_weights()
|
31 |
-
pipe.enable_xformers_memory_efficient_attention()
|
32 |
-
pipe.unet.to(memory_format=torch.channels_last)
|
33 |
-
pipe.vae.to(memory_format=torch.channels_last)
|
34 |
-
|
35 |
-
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead")
|
36 |
-
pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead")
|
37 |
|
38 |
torch.cuda.empty_cache()
|
39 |
|
40 |
# Inference function
|
|
|
41 |
def generate_image(prompt, seed=24, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT, randomize_seed=False, num_inference_steps=2, progress=gr.Progress(track_tqdm=True)):
|
42 |
if randomize_seed:
|
43 |
seed = random.randint(0, MAX_SEED)
|
@@ -78,7 +73,7 @@ with gr.Blocks() as demo:
|
|
78 |
with gr.Column(scale=2.5):
|
79 |
result = gr.Image(label="Generated Image", show_label=False, interactive=False)
|
80 |
with gr.Column(scale=1):
|
81 |
-
prompt = gr.
|
82 |
label="Prompt",
|
83 |
placeholder="Describe the image you want to generate...",
|
84 |
lines=3,
|
@@ -91,7 +86,7 @@ with gr.Blocks() as demo:
|
|
91 |
with gr.Column("Advanced Options"):
|
92 |
with gr.Row():
|
93 |
realtime = gr.Checkbox(label="Realtime Toggler", info="If TRUE then uses more GPU but create image in realtime.", value=False)
|
94 |
-
latency = gr.
|
95 |
with gr.Row():
|
96 |
seed = gr.Number(label="Seed", value=42)
|
97 |
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import random
|
4 |
+
import spaces
|
5 |
import torch
|
6 |
import time
|
7 |
from diffusers import DiffusionPipeline, AutoencoderTiny
|
|
|
9 |
from custom_pipeline import FluxWithCFGPipeline
|
10 |
|
11 |
torch.backends.cuda.matmul.allow_tf32 = True
|
|
|
12 |
|
13 |
# Constants
|
14 |
MAX_SEED = np.iinfo(np.int32).max
|
|
|
18 |
DEFAULT_INFERENCE_STEPS = 1
|
19 |
|
20 |
# Device and model setup
|
21 |
+
dtype = torch.float16
|
22 |
pipe = FluxWithCFGPipeline.from_pretrained(
|
23 |
"black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
|
24 |
)
|
|
|
28 |
pipe.set_adapters(["better"], adapter_weights=[1.0])
|
29 |
pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
|
30 |
pipe.unload_lora_weights()
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
torch.cuda.empty_cache()
|
33 |
|
34 |
# Inference function
|
35 |
+
@spaces.GPU(duration=25)
|
36 |
def generate_image(prompt, seed=24, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT, randomize_seed=False, num_inference_steps=2, progress=gr.Progress(track_tqdm=True)):
|
37 |
if randomize_seed:
|
38 |
seed = random.randint(0, MAX_SEED)
|
|
|
73 |
with gr.Column(scale=2.5):
|
74 |
result = gr.Image(label="Generated Image", show_label=False, interactive=False)
|
75 |
with gr.Column(scale=1):
|
76 |
+
prompt = gr.Text(
|
77 |
label="Prompt",
|
78 |
placeholder="Describe the image you want to generate...",
|
79 |
lines=3,
|
|
|
86 |
with gr.Column("Advanced Options"):
|
87 |
with gr.Row():
|
88 |
realtime = gr.Checkbox(label="Realtime Toggler", info="If TRUE then uses more GPU but create image in realtime.", value=False)
|
89 |
+
latency = gr.Text(label="Latency")
|
90 |
with gr.Row():
|
91 |
seed = gr.Number(label="Seed", value=42)
|
92 |
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
custom_pipeline.py
CHANGED
@@ -3,29 +3,20 @@ import numpy as np
|
|
3 |
from diffusers import FluxPipeline, FlowMatchEulerDiscreteScheduler
|
4 |
from typing import Any, Dict, List, Optional, Union
|
5 |
from PIL import Image
|
6 |
-
from torch.cuda import graphs
|
7 |
|
8 |
-
#
|
9 |
-
torch.backends.cuda.matmul.allow_tf32 = True
|
10 |
-
torch.backends.cudnn.allow_tf32 = True
|
11 |
-
torch.backends.cudnn.benchmark = True
|
12 |
-
|
13 |
-
# Constants with optimized values
|
14 |
BASE_SEQ_LEN = 256
|
15 |
MAX_SEQ_LEN = 4096
|
16 |
BASE_SHIFT = 0.5
|
17 |
MAX_SHIFT = 1.2
|
18 |
-
BATCH_SIZE = 4 # Optimal batch size for A100
|
19 |
|
20 |
-
|
21 |
def calculate_timestep_shift(image_seq_len: int) -> float:
|
22 |
-
|
23 |
-
MAX_SEQ_LEN = 4096
|
24 |
-
BASE_SHIFT = 0.5
|
25 |
-
MAX_SHIFT = 1.2
|
26 |
m = (MAX_SHIFT - BASE_SHIFT) / (MAX_SEQ_LEN - BASE_SEQ_LEN)
|
27 |
b = BASE_SHIFT - m * BASE_SEQ_LEN
|
28 |
-
|
|
|
29 |
|
30 |
def prepare_timesteps(
|
31 |
scheduler: FlowMatchEulerDiscreteScheduler,
|
@@ -35,25 +26,19 @@ def prepare_timesteps(
|
|
35 |
sigmas: Optional[List[float]] = None,
|
36 |
mu: Optional[float] = None,
|
37 |
) -> (torch.Tensor, int):
|
38 |
-
"""
|
39 |
-
if
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
scheduler.set_timesteps(sigmas=sigmas, device=device)
|
51 |
-
else:
|
52 |
-
scheduler.set_timesteps(num_inference_steps, device=device, mu=mu)
|
53 |
-
|
54 |
-
timesteps = scheduler.timesteps.to(memory_format=torch.channels_last)
|
55 |
num_inference_steps = len(timesteps)
|
56 |
-
|
57 |
return timesteps, num_inference_steps
|
58 |
|
59 |
# FLUX pipeline function
|
|
|
3 |
from diffusers import FluxPipeline, FlowMatchEulerDiscreteScheduler
|
4 |
from typing import Any, Dict, List, Optional, Union
|
5 |
from PIL import Image
|
|
|
6 |
|
7 |
+
# Constants for shift calculation
|
|
|
|
|
|
|
|
|
|
|
8 |
BASE_SEQ_LEN = 256
|
9 |
MAX_SEQ_LEN = 4096
|
10 |
BASE_SHIFT = 0.5
|
11 |
MAX_SHIFT = 1.2
|
|
|
12 |
|
13 |
+
# Helper functions
|
14 |
def calculate_timestep_shift(image_seq_len: int) -> float:
|
15 |
+
"""Calculates the timestep shift (mu) based on the image sequence length."""
|
|
|
|
|
|
|
16 |
m = (MAX_SHIFT - BASE_SHIFT) / (MAX_SEQ_LEN - BASE_SEQ_LEN)
|
17 |
b = BASE_SHIFT - m * BASE_SEQ_LEN
|
18 |
+
mu = image_seq_len * m + b
|
19 |
+
return mu
|
20 |
|
21 |
def prepare_timesteps(
|
22 |
scheduler: FlowMatchEulerDiscreteScheduler,
|
|
|
26 |
sigmas: Optional[List[float]] = None,
|
27 |
mu: Optional[float] = None,
|
28 |
) -> (torch.Tensor, int):
|
29 |
+
"""Prepares the timesteps for the diffusion process."""
|
30 |
+
if timesteps is not None and sigmas is not None:
|
31 |
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed.")
|
32 |
+
|
33 |
+
if timesteps is not None:
|
34 |
+
scheduler.set_timesteps(timesteps=timesteps, device=device)
|
35 |
+
elif sigmas is not None:
|
36 |
+
scheduler.set_timesteps(sigmas=sigmas, device=device)
|
37 |
+
else:
|
38 |
+
scheduler.set_timesteps(num_inference_steps, device=device, mu=mu)
|
39 |
+
|
40 |
+
timesteps = scheduler.timesteps
|
|
|
|
|
|
|
|
|
|
|
41 |
num_inference_steps = len(timesteps)
|
|
|
42 |
return timesteps, num_inference_steps
|
43 |
|
44 |
# FLUX pipeline function
|