Spaces:
Running
on
Zero
Running
on
Zero
# Copyright 2023 The HuggingFace Team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import gradio as gr | |
import spaces | |
import argparse | |
import inspect | |
import os | |
from typing import Any, Callable, Dict, List, Optional, Tuple, Union | |
import matplotlib.pyplot as plt | |
from PIL import Image | |
import torch | |
import torch.nn.functional as F | |
import numpy as np | |
import random | |
import warnings | |
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer | |
from utils import * | |
import hashlib | |
from diffusers.image_processor import VaeImageProcessor | |
from diffusers.loaders import ( | |
FromSingleFileMixin, | |
LoraLoaderMixin, | |
TextualInversionLoaderMixin, | |
) | |
from diffusers.models import AutoencoderKL, UNet2DConditionModel | |
from diffusers.models.attention_processor import ( | |
AttnProcessor2_0, | |
LoRAAttnProcessor2_0, | |
LoRAXFormersAttnProcessor, | |
XFormersAttnProcessor, | |
) | |
from diffusers.models.lora import adjust_lora_scale_text_encoder | |
from diffusers.schedulers import KarrasDiffusionSchedulers | |
from diffusers.utils import ( | |
is_accelerate_available, | |
is_accelerate_version, | |
is_invisible_watermark_available, | |
logging, | |
replace_example_docstring, | |
) | |
from diffusers.utils.torch_utils import randn_tensor | |
from diffusers.pipelines.pipeline_utils import DiffusionPipeline | |
from diffusers.pipelines.stable_diffusion_xl import StableDiffusionXLPipelineOutput | |
from accelerate.utils import set_seed | |
from tqdm import tqdm | |
if is_invisible_watermark_available(): | |
from .watermark import StableDiffusionXLWatermarker | |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name | |
EXAMPLE_DOC_STRING = """ | |
Examples: | |
```py | |
>>> import torch | |
>>> from diffusers import StableDiffusionXLPipeline | |
>>> pipe = StableDiffusionXLPipeline.from_pretrained( | |
... "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 | |
... ) | |
>>> pipe = pipe.to("cuda") | |
>>> prompt = "a photo of an astronaut riding a horse on mars" | |
>>> image = pipe(prompt).images[0] | |
``` | |
""" | |
def gaussian_kernel(kernel_size=3, sigma=1.0, channels=3): | |
x_coord = torch.arange(kernel_size) | |
gaussian_1d = torch.exp(-(x_coord - (kernel_size - 1) / 2) ** 2 / (2 * sigma ** 2)) | |
gaussian_1d = gaussian_1d / gaussian_1d.sum() | |
gaussian_2d = gaussian_1d[:, None] * gaussian_1d[None, :] | |
kernel = gaussian_2d[None, None, :, :].repeat(channels, 1, 1, 1) | |
return kernel | |
def gaussian_filter(latents, kernel_size=3, sigma=1.0): | |
channels = latents.shape[1] | |
kernel = gaussian_kernel(kernel_size, sigma, channels).to(latents.device, latents.dtype) | |
blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels) | |
return blurred_latents | |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg | |
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): | |
""" | |
Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and | |
Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4 | |
""" | |
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) | |
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) | |
# rescale the results from guidance (fixes overexposure) | |
noise_pred_rescaled = noise_cfg * (std_text / std_cfg) | |
# mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images | |
noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg | |
return noise_cfg | |
class AccDiffusionSDXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin): | |
""" | |
Pipeline for text-to-image generation using Stable Diffusion XL. | |
[ํด๋์ค ์ค๋ช ์๋ต โฆ] | |
""" | |
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae" | |
def __init__( | |
self, | |
vae: AutoencoderKL, | |
text_encoder: CLIPTextModel, | |
text_encoder_2: CLIPTextModelWithProjection, | |
tokenizer: CLIPTokenizer, | |
tokenizer_2: CLIPTokenizer, | |
unet: UNet2DConditionModel, | |
scheduler: KarrasDiffusionSchedulers, | |
force_zeros_for_empty_prompt: bool = True, | |
add_watermarker: Optional[bool] = None, | |
): | |
super().__init__() | |
self.register_modules( | |
vae=vae, | |
text_encoder=text_encoder, | |
text_encoder_2=text_encoder_2, | |
tokenizer=tokenizer, | |
tokenizer_2=tokenizer_2, | |
unet=unet, | |
scheduler=scheduler, | |
) | |
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) | |
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) | |
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) | |
self.default_sample_size = self.unet.config.sample_size | |
add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available() | |
if add_watermarker: | |
self.watermark = StableDiffusionXLWatermarker() | |
else: | |
self.watermark = None | |
# (์ดํ ๊ธฐ์กด ๋ฉ์๋๋ค ์๋ต โฆ) | |
def __call__( | |
self, | |
prompt: Union[str, List[str]] = None, | |
prompt_2: Optional[Union[str, List[str]]] = None, | |
height: Optional[int] = None, | |
width: Optional[int] = None, | |
num_inference_steps: int = 50, | |
denoising_end: Optional[float] = None, | |
guidance_scale: float = 5.0, | |
negative_prompt: Optional[Union[str, List[str]]] = None, | |
negative_prompt_2: Optional[Union[str, List[str]]] = None, | |
num_images_per_prompt: Optional[int] = 1, | |
eta: float = 0.0, | |
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, | |
latents: Optional[torch.FloatTensor] = None, | |
prompt_embeds: Optional[torch.FloatTensor] = None, | |
negative_prompt_embeds: Optional[torch.FloatTensor] = None, | |
pooled_prompt_embeds: Optional[torch.FloatTensor] = None, | |
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None, | |
output_type: Optional[str] = "pil", | |
return_dict: bool = False, | |
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, | |
callback_steps: int = 1, | |
cross_attention_kwargs: Optional[Dict[str, Any]] = None, | |
guidance_rescale: float = 0.0, | |
original_size: Optional[Tuple[int, int]] = None, | |
crops_coords_top_left: Tuple[int, int] = (0, 0), | |
target_size: Optional[Tuple[int, int]] = None, | |
negative_original_size: Optional[Tuple[int, int]] = None, | |
negative_crops_coords_top_left: Tuple[int, int] = (0, 0), | |
negative_target_size: Optional[Tuple[int, int]] = None, | |
################### AccDiffusion specific parameters #################### | |
image_lr: Optional[torch.FloatTensor] = None, | |
view_batch_size: int = 16, | |
multi_decoder: bool = True, | |
stride: Optional[int] = 64, | |
cosine_scale_1: Optional[float] = 3., | |
cosine_scale_2: Optional[float] = 1., | |
cosine_scale_3: Optional[float] = 1., | |
sigma: Optional[float] = 1.0, | |
lowvram: bool = False, | |
multi_guidance_scale: Optional[float] = 7.5, | |
use_guassian: bool = True, | |
upscale_mode: Union[str, List[str]] = 'bicubic_latent', | |
use_multidiffusion: bool = True, | |
use_dilated_sampling : bool = True, | |
use_skip_residual: bool = True, | |
use_progressive_upscaling: bool = True, | |
shuffle: bool = False, | |
result_path: str = './outputs/AccDiffusion', | |
debug: bool = False, | |
use_md_prompt: bool = False, | |
attn_res=None, | |
save_attention_map: bool = False, | |
seed: Optional[int] = None, | |
c : Optional[float] = 0.3, | |
): | |
r""" | |
[ํจ์ ์ค๋ช ์๋ต โฆ] | |
""" | |
# (์ฌ๊ธฐ์๋ ๊ธฐ์กด __call__ ํจ์ ๋ด๋ถ ๊ตฌํ์ ๊ทธ๋๋ก ์ ์งํฉ๋๋ค.) | |
# ... (์ค๋ต) | |
output_images = [] | |
###################################################### Phase Initialization ######################################################## | |
# (์ค๋ต) ์ค์ denoising ๋ฐ upscaling ๋ถ๋ถ | |
# ๋ง์ง๋ง์ ์ด๋ฏธ์ง ์ ์ฅ ๋ฐ ๋ฐํ | |
return output_images | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
### AccDiffusion PARAMETERS ### | |
parser.add_argument('--model_ckpt', default='stabilityai/stable-diffusion-xl-base-1.0') | |
parser.add_argument('--seed', type=int, default=42) | |
parser.add_argument('--prompt', default="Astronaut on Mars During sunset.") | |
parser.add_argument('--negative_prompt', default="blurry, ugly, duplicate, poorly drawn, deformed, mosaic") | |
parser.add_argument('--cosine_scale_1', default=3, type=float, help="cosine scale 1") | |
parser.add_argument('--cosine_scale_2', default=1, type=float, help="cosine scale 2") | |
parser.add_argument('--cosine_scale_3', default=1, type=float, help="cosine scale 3") | |
parser.add_argument('--sigma', default=0.8, type=float, help="sigma") | |
parser.add_argument('--multi_decoder', default=True, type=bool, help="multi decoder or not") | |
parser.add_argument('--num_inference_steps', default=50, type=int, help="num inference steps") | |
parser.add_argument('--resolution', default='1024,1024', help="target resolution") | |
parser.add_argument('--use_multidiffusion', default=False, action='store_true', help="use multidiffusion or not") | |
parser.add_argument('--use_guassian', default=False, action='store_true', help="use guassian or not") | |
parser.add_argument('--use_dilated_sampling', default=True, action='store_true', help="use dilated sampling or not") | |
parser.add_argument('--use_progressive_upscaling', default=False, action='store_true', help="use progressive upscaling or not") | |
parser.add_argument('--shuffle', default=False, action='store_true', help="shuffle or not") | |
parser.add_argument('--use_skip_residual', default=False, action='store_true', help="use skip_residual or not") | |
parser.add_argument('--save_attention_map', default=False, action='store_true', help="save attention map or not") | |
parser.add_argument('--multi_guidance_scale', default=7.5, type=float, help="multi guidance scale") | |
parser.add_argument('--upscale_mode', default="bicubic_latent", help="bicubic_image or bicubic_latent ") | |
parser.add_argument('--use_md_prompt', default=False, action='store_true', help="use md prompt or not") | |
parser.add_argument('--view_batch_size', default=16, type=int, help="view_batch_size") | |
parser.add_argument('--stride', default=64, type=int, help="stride") | |
parser.add_argument('--c', default=0.3, type=float, help="threshold") | |
## others ## | |
parser.add_argument('--debug', default=False, action='store_true') | |
parser.add_argument('--experiment_name', default="AccDiffusion") | |
args = parser.parse_args() | |
# ํ์ดํ๋ผ์ธ ๋ถ๋ฌ์ค๊ธฐ (ํ์ํ ๋ชจ๋ธ ์ฒดํฌํฌ์ธํธ ์ฌ์ฉ) | |
pipe = AccDiffusionSDXLPipeline.from_pretrained(args.model_ckpt, torch_dtype=torch.float16).to("cuda") | |
# ----------------------- GRADIO INTERFACE (๊ฐ์ ๋ UI) ----------------------- | |
# ์ฌ์ฉ์ ์ธํฐํ์ด์ค์ ์ ์ฉํ CSS (๋ฐฐ๊ฒฝ, ํฐํธ, ์นด๋ ์คํ์ผ ๋ฑ) | |
css = """ | |
body { | |
background: linear-gradient(135deg, #2c3e50, #4ca1af); | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
color: #ffffff; | |
} | |
#col-container { | |
margin: 20px auto; | |
padding: 20px; | |
max-width: 900px; | |
background-color: rgba(0, 0, 0, 0.5); | |
border-radius: 12px; | |
box-shadow: 0 4px 12px rgba(0,0,0,0.5); | |
} | |
h1, h2 { | |
text-align: center; | |
margin-bottom: 10px; | |
} | |
footer { | |
visibility: hidden; | |
} | |
""" | |
def infer(prompt, resolution, num_inference_steps, guidance_scale, seed, use_multidiffusion, use_skip_residual, use_dilated_sampling, use_progressive_upscaling, shuffle, use_md_prompt, progress=gr.Progress(track_tqdm=True)): | |
set_seed(seed) | |
width, height = list(map(int, resolution.split(','))) | |
cross_attention_kwargs = {"edit_type": "visualize", | |
"n_self_replace": 0.4, | |
"n_cross_replace": {"default_": 1.0, "confetti": 0.8}, | |
} | |
generator = torch.Generator(device='cuda').manual_seed(seed) | |
print(f"Prompt: {prompt}") | |
md5_hash = hashlib.md5(prompt.encode()).hexdigest() | |
result_path = f"./output/{args.experiment_name}/{md5_hash}/{width}_{height}_{seed}/" | |
images = pipe( | |
prompt, | |
negative_prompt=args.negative_prompt, | |
generator=generator, | |
width=width, | |
height=height, | |
view_batch_size=args.view_batch_size, | |
stride=args.stride, | |
cross_attention_kwargs=cross_attention_kwargs, | |
num_inference_steps=num_inference_steps, | |
guidance_scale=guidance_scale, | |
multi_guidance_scale=args.multi_guidance_scale, | |
cosine_scale_1=args.cosine_scale_1, | |
cosine_scale_2=args.cosine_scale_2, | |
cosine_scale_3=args.cosine_scale_3, | |
sigma=args.sigma, | |
use_guassian=args.use_guassian, | |
multi_decoder=args.multi_decoder, | |
upscale_mode=args.upscale_mode, | |
use_multidiffusion=use_multidiffusion, | |
use_skip_residual=use_skip_residual, | |
use_progressive_upscaling=use_progressive_upscaling, | |
use_dilated_sampling=use_dilated_sampling, | |
shuffle=shuffle, | |
result_path=result_path, | |
debug=args.debug, | |
save_attention_map=args.save_attention_map, | |
use_md_prompt=use_md_prompt, | |
c=args.c | |
) | |
print(images) | |
return images | |
MAX_SEED = np.iinfo(np.int32).max | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown("<h1>AccDiffusion: Advanced AI Art Generator</h1>") | |
gr.Markdown( | |
"์์ฑํ ์ด๋ฏธ์ง๋ฅผ ์ํ ์ฐฝ์์ ์ธ ํ๋กฌํํธ๋ฅผ ์ ๋ ฅํ์ธ์. ์ด ๋ชจ๋ธ์ ์ต์ AccDiffusion ๊ธฐ๋ฒ์ ์ ์ฉํ์ฌ ๋ค์ํ ์คํ์ผ๊ณผ ํด์๋์ ์์ ์ํ์ ๋ง๋ค์ด๋ ๋๋ค." | |
) | |
with gr.Row(): | |
prompt = gr.Textbox(label="Prompt", placeholder="์: A surreal landscape with floating islands and vibrant colors.", lines=2, scale=4) | |
submit_btn = gr.Button("Generate", scale=1) | |
with gr.Accordion("Advanced Settings", open=False): | |
with gr.Row(): | |
resolution = gr.Radio( | |
label="Resolution", | |
choices=[ | |
"1024,1024", "2048,2048", "2048,1024", "1536,3072", "3072,3072", "4096,4096", "4096,2048" | |
], | |
value="1024,1024", | |
interactive=True | |
) | |
with gr.Column(): | |
num_inference_steps = gr.Slider(label="Inference Steps", minimum=2, maximum=50, step=1, value=30, info="Number of denoising steps") | |
guidance_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=510, step=0.1, value=7.5, info="Higher values increase adherence to the prompt") | |
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, info="Set a seed for reproducibility") | |
with gr.Row(): | |
use_multidiffusion = gr.Checkbox(label="Use MultiDiffusion", value=True) | |
use_skip_residual = gr.Checkbox(label="Use Skip Residual", value=True) | |
use_dilated_sampling = gr.Checkbox(label="Use Dilated Sampling", value=True) | |
with gr.Row(): | |
use_progressive_upscaling = gr.Checkbox(label="Use Progressive Upscaling", value=False) | |
shuffle = gr.Checkbox(label="Shuffle", value=False) | |
use_md_prompt = gr.Checkbox(label="Use MD Prompt", value=False) | |
output_images = gr.Gallery(label="Output Images", format="png").style(grid=[2], height="auto") | |
gr.Markdown("### Example Prompts") | |
gr.Examples( | |
examples=[ | |
["A surreal landscape with floating islands and vibrant colors."], | |
["Cyberpunk cityscape at night with neon lights and futuristic architecture."], | |
["A majestic dragon soaring over a medieval castle amidst stormy skies."], | |
["Futuristic robot exploring an alien planet with mysterious flora."], | |
["Abstract geometric patterns in vivid, pulsating colors."], | |
["A mystical forest illuminated by bioluminescent plants under a starry sky."] | |
], | |
inputs=[prompt], | |
label="Click an example to populate the prompt box." | |
) | |
submit_btn.click( | |
fn=infer, | |
inputs=[prompt, resolution, num_inference_steps, guidance_scale, seed, | |
use_multidiffusion, use_skip_residual, use_dilated_sampling, use_progressive_upscaling, shuffle, use_md_prompt], | |
outputs=[output_images], | |
show_api=False | |
) | |
demo.launch(show_api=False, show_error=True) | |