File size: 14,575 Bytes
26a9974 de3b22f 26a9974 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 |
# -*- coding: utf-8 -*-
"""DreamBooth_Stable_Diffusion_V2.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/github/KaliYuga-ai/DreamBoothV2fork/blob/main/DreamBooth_Stable_Diffusion_V2.ipynb
##DreamBooth with Stable Diffusion V2
This notebook is [KaliYuga](https://twitter.com/KaliYuga_ai)'s very basic fork of [Shivam Shrirao](https://github.com/ShivamShrirao)'s DreamBooth notebook. In addition to a vew minor formatting and QoL additions, I've added Stable Diffusion V2 as the default training option and optimized the training settings to reflect what I've found to be the best general ones. They are only suggestions; feel free to tweak anything and everything if my defaults don't do it for you.
**I also [wrote a guide](https://peakd.com/hive-158694/@kaliyuga/training-a-dreambooth-model-using-stable-diffusion-v2-and-very-little-code)** that should take you through building a dataset and training a model using this notebook. If this is your first time creating a model from scratch, I reccommend you check it out!
"""
#@markdown Check type of GPU and VRAM available.
#!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader
"""https://github.com/KaliYuga-ai/diffusers/tree/main/examples/dreambooth
## Install Requirements
"""
# Commented out IPython magic to ensure Python compatibility.
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/examples/dreambooth/train_dreambooth.py
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/scripts/convert_diffusers_to_original_stable_diffusion.py
# %pip install -qq git+https://github.com/ShivamShrirao/diffusers
# %pip install -q -U --pre triton
# %pip install -q accelerate==0.12.0 transformers ftfy bitsandbytes gradio natsort
#@title Login to HuggingFace π€
#@markdown You need to accept the model license before downloading or using the Stable Diffusion weights. Please, visit the [model card](https://huggingface.co/stabilityai/stable-diffusion-2), read the license and tick the checkbox if you agree. You have to be a registered user in π€ Hugging Face Hub, and you'll also need to use an access token for the code to work.
# https://huggingface.co/settings/tokens
!mkdir -p ~/.huggingface
HUGGINGFACE_TOKEN = "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" #@param {type:"string"}
!echo -n "{HUGGINGFACE_TOKEN}" > ~/.huggingface/token
"""### Install xformers from precompiled wheel."""
# Commented out IPython magic to ensure Python compatibility.
# %pip install -q https://github.com/metrolobo/xformers_wheels/releases/download/1d31a3ac_various_6/xformers-0.0.14.dev0-cp37-cp37m-linux_x86_64.whl
# These were compiled on Tesla T4, should also work on P100, thanks to https://github.com/metrolobo
# If precompiled wheels don't work, install it with the following command. It will take around 40 minutes to compile.
# %pip install git+https://github.com/facebookresearch/xformers@1d31a3a#egg=xformers
"""## Settings and run"""
#@markdown If model weights should be saved directly in google drive (takes around 4-5 GB).
save_to_gdrive = True #@param {type:"boolean"}
if save_to_gdrive:
from google.colab import drive
drive.mount('/content/drive')
#@markdown Name/Path of the initial model.
MODEL_NAME = "stabilityai/stable-diffusion-2" #@param {type:"string"}
#@markdown Enter the directory name to save model at.
OUTPUT_DIR = "stable_diffusion_weights/ps1theme" #@param {type:"string"}
if save_to_gdrive:
OUTPUT_DIR = "/content/drive/MyDrive/" + OUTPUT_DIR
else:
OUTPUT_DIR = "/content/" + OUTPUT_DIR
print(f"[*] Weights will be saved at {OUTPUT_DIR}")
!mkdir -p $OUTPUT_DIR
"""### Start Training
Use the table below to choose the best flags based on your memory and speed requirements. Tested on Tesla T4 GPU.
| `fp16` | `train_batch_size` | `gradient_accumulation_steps` | `gradient_checkpointing` | `use_8bit_adam` | GB VRAM usage | Speed (it/s) |
| ---- | ------------------ | ----------------------------- | ----------------------- | --------------- | ---------- | ------------ |
| fp16 | 1 | 1 | TRUE | TRUE | 9.92 | 0.93 |
| no | 1 | 1 | TRUE | TRUE | 10.08 | 0.42 |
| fp16 | 2 | 1 | TRUE | TRUE | 10.4 | 0.66 |
| fp16 | 1 | 1 | FALSE | TRUE | 11.17 | 1.14 |
| no | 1 | 1 | FALSE | TRUE | 11.17 | 0.49 |
| fp16 | 1 | 2 | TRUE | TRUE | 11.56 | 1 |
| fp16 | 2 | 1 | FALSE | TRUE | 13.67 | 0.82 |
| fp16 | 1 | 2 | FALSE | TRUE | 13.7 | 0.83 |
| fp16 | 1 | 1 | TRUE | FALSE | 15.79 | 0.77 |
------------------------------------------------------------------------------
- `--gradient_checkpointing` flag is enabled by default; it reduces VRAM usage to 9.92 GB usage.
- remove `--use_8bit_adam` flag for full precision. Requires 15.79 GB with `--gradient_checkpointing` else 17.8 GB.
- remove `--train_text_encoder` flag to reduce memory usage further, degrades output quality. NOT RECCOMMENDED.
### Define Your Concepts List
You can add multiple concepts here. Try tweaking `--max_train_steps` accordingly.
It's a good idea to test class prompts in Stable Diffusion V2 before committing to them. If the images V2 generates at a CFG of 7 and 50 steps aren't great, consider a different class prompt.
"""
concepts_list = [
{
"instance_prompt": "ps1theme",
"class_prompt": "women",
"instance_data_dir": "/content/drive/MyDrive/ps1theme",
"class_data_dir": "/content/data/women"
},
# {
# "instance_prompt": "photo of zwx dog",
# "class_prompt": "photo of a dog",
# "instance_data_dir": "/content/data/zwx",
# "class_data_dir": "/content/data/dog"
# },
# {
# "instance_prompt": "photo of ukj person",
# "class_prompt": "photo of a person",
# "instance_data_dir": "/content/data/ukj",
# "class_data_dir": "/content/data/person"
# }
]
# `class_data_dir` contains regularization images
import json
import os
for c in concepts_list:
os.makedirs(c["instance_data_dir"], exist_ok=True)
with open("concepts_list.json", "w") as f:
json.dump(concepts_list, f, indent=4)
"""### Image Upload"""
#@markdown Upload your images by running this cell.
#@markdown OR
#@markdown Alteranately, add your dataset to google drive and then copy its path into `instance_data_dir"`, above. You can also use the file manager on the left panel to upload (drag and drop) to each `instance_data_dir`; it uploads faster than running the cell.
import os
from google.colab import files
import shutil
for c in concepts_list:
print(f"Uploading instance images for `{c['instance_prompt']}`")
uploaded = files.upload()
for filename in uploaded.keys():
dst_path = os.path.join(c['instance_data_dir'], filename)
shutil.move(filename, dst_path)
"""### Training Settings
The Learning Rate in this notebook has been sped up from the default LR in previous Dreambooth notebooks; training runs slower on SD V 2. This might not be the best LR for all usecases, but does well for all the datasets I (KaliYuga) have tried so far.
Please note, `gradient_checkpointing` is enabled by default. I think it produces better results, and it reduces VRAM.
"""
!accelerate launch train_dreambooth.py \
--pretrained_model_name_or_path=$MODEL_NAME \
--pretrained_vae_name_or_path="stabilityai/sd-vae-ft-mse" \
--output_dir=$OUTPUT_DIR \
--revision="fp16" \
--with_prior_preservation --prior_loss_weight=1.0 \
--seed=1337 \
--resolution=512 \
--train_batch_size=1 \
--train_text_encoder \
--mixed_precision="fp16" \
--use_8bit_adam \
--gradient_accumulation_steps=1 \
--gradient_checkpointing \
--learning_rate=4e-6 \
--lr_scheduler="constant" \
--lr_warmup_steps=0 \
--num_class_images=50 \
--sample_batch_size=4 \
--max_train_steps=5000 \
--save_interval=500 \
--save_sample_prompt="ps1theme" \
--concepts_list="concepts_list.json"
# Reduce the `--save_interval` to lower than `--max_train_steps` to save weights from intermediate steps.
# `--save_sample_prompt` can be same as `--instance_prompt` to generate intermediate samples (saved along with weights in samples directory).
"""### Testing your new model
Once your model has finished training (or has reached a checkpoint you like), run the following cells to test it out.
"""
#@markdown Specify the weights directory to use (leave blank for latest)
WEIGHTS_DIR = "/content/drive/MyDrive/stable_diffusion_weights/ps1theme/2000" #@param {type:"string"}
if WEIGHTS_DIR == "":
from natsort import natsorted
from glob import glob
import os
WEIGHTS_DIR = natsorted(glob(OUTPUT_DIR + os.sep + "*"))[-1]
print(f"[*] WEIGHTS_DIR={WEIGHTS_DIR}")
from transformers import BertForMaskedLM
model = BertForMaskedLM.from_pretrained("remi/bertabs-finetuned-extractive-abstractive-summarization")
from google.colab import drive
drive.mount('/content/drive')
#@markdown Run to generate a grid of preview images from the last saved weights.
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
weights_folder = OUTPUT_DIR
folders = sorted([f for f in os.listdir(weights_folder) if f != "0"], key=lambda x: int(x))
row = len(folders)
col = len(os.listdir(os.path.join(weights_folder, folders[0], "samples")))
scale = 4
fig, axes = plt.subplots(row, col, figsize=(col*scale, row*scale), gridspec_kw={'hspace': 0, 'wspace': 0})
for i, folder in enumerate(folders):
folder_path = os.path.join(weights_folder, folder)
image_folder = os.path.join(folder_path, "samples")
images = [f for f in os.listdir(image_folder)]
for j, image in enumerate(images):
if row == 1:
currAxes = axes[j]
else:
currAxes = axes[i, j]
if i == 0:
currAxes.set_title(f"Image {j}")
if j == 0:
currAxes.text(-0.1, 0.5, folder, rotation=0, va='center', ha='center', transform=currAxes.transAxes)
image_path = os.path.join(image_folder, image)
img = mpimg.imread(image_path)
currAxes.imshow(img, cmap='gray')
currAxes.axis('off')
plt.tight_layout()
plt.savefig('grid.png', dpi=72)
"""#### Convert weights to ckpt to use in web UIs like AUTOMATIC1111."""
#@markdown Run conversion.
ckpt_path = WEIGHTS_DIR + "/model.ckpt"
half_arg = ""
#@markdown Whether to convert to fp16, takes half the space (2GB).
fp16 = True #@param {type: "boolean"}
if fp16:
half_arg = "--half"
!python convert_diffusers_to_original_stable_diffusion.py --model_path $WEIGHTS_DIR --checkpoint_path $ckpt_path $half_arg
print(f"[*] Converted ckpt saved at {ckpt_path}")
"""#### Inference"""
from transformers import CLIPProcessor, CLIPModel,CLIPTokenizer
import torch
from torch import autocast
from diffusers import StableDiffusionPipeline, DDIMScheduler
from IPython.display import display
model_path = WEIGHTS_DIR # If you want to use previously trained model saved in gdrive, replace this with the full path of model in gdrive
scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
pipe = StableDiffusionPipeline.from_pretrained(model_path, scheduler=scheduler, safety_checker=None, torch_dtype=torch.float16).to("cuda")
g_cuda = None
#@markdown Can set random seed here for reproducibility.
g_cuda = torch.Generator(device='cuda')
seed = 47853 #@param {type:"number"}
g_cuda.manual_seed(seed)
#@title ##Run for generating images.
prompt = "ps1theme" #@param {type:"string"}
negative_prompt = "ugly,cartoon,3d,((disfigured)),((bad art)),(deformed)),(poorly drawn)),((blurry))" #@param {type:"string"}
num_samples = 4 #@param {type:"number"}
guidance_scale = 7.5 #@param {type:"number"}
num_inference_steps = 50 #@param {type:"number"}
height = 512 #@param {type:"number"}
width = 512 #@param {type:"number"}
with autocast("cuda"), torch.inference_mode():
images = pipe(
prompt,
height=height,
width=width,
negative_prompt=negative_prompt,
num_images_per_prompt=num_samples,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
generator=g_cuda
).images
for img in images:
display(img)
#@markdown Run Gradio UI for generating images.
import gradio as gr
def inference(prompt, negative_prompt, num_samples, height=512, width=512, num_inference_steps=50, guidance_scale=7.5):
with torch.autocast("cuda"), torch.inference_mode():
return pipe(
prompt, height=int(height), width=int(width),
negative_prompt=negative_prompt,
num_images_per_prompt=int(num_samples),
num_inference_steps=int(num_inference_steps), guidance_scale=guidance_scale,
generator=g_cuda
).images
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt", value="photo of zwx dog in a bucket")
negative_prompt = gr.Textbox(label="Negative Prompt", value="")
run = gr.Button(value="Generate")
with gr.Row():
num_samples = gr.Number(label="Number of Samples", value=4)
guidance_scale = gr.Number(label="Guidance Scale", value=7.5)
with gr.Row():
height = gr.Number(label="Height", value=512)
width = gr.Number(label="Width", value=512)
num_inference_steps = gr.Slider(label="Steps", value=50)
with gr.Column():
gallery = gr.Gallery()
run.click(inference, inputs=[prompt, negative_prompt, num_samples, height, width, num_inference_steps, guidance_scale], outputs=gallery)
demo.launch(debug=True,share = True) |