|
|
|
"""DreamBooth_Stable_Diffusion_V2.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/github/KaliYuga-ai/DreamBoothV2fork/blob/main/DreamBooth_Stable_Diffusion_V2.ipynb |
|
|
|
##DreamBooth with Stable Diffusion V2 |
|
|
|
This notebook is [KaliYuga](https://twitter.com/KaliYuga_ai)'s very basic fork of [Shivam Shrirao](https://github.com/ShivamShrirao)'s DreamBooth notebook. In addition to a vew minor formatting and QoL additions, I've added Stable Diffusion V2 as the default training option and optimized the training settings to reflect what I've found to be the best general ones. They are only suggestions; feel free to tweak anything and everything if my defaults don't do it for you. |
|
|
|
**I also [wrote a guide](https://peakd.com/hive-158694/@kaliyuga/training-a-dreambooth-model-using-stable-diffusion-v2-and-very-little-code)** that should take you through building a dataset and training a model using this notebook. If this is your first time creating a model from scratch, I reccommend you check it out! |
|
""" |
|
|
|
|
|
|
|
|
|
"""https://github.com/KaliYuga-ai/diffusers/tree/main/examples/dreambooth |
|
|
|
## Install Requirements |
|
""" |
|
|
|
|
|
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/examples/dreambooth/train_dreambooth.py |
|
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/scripts/convert_diffusers_to_original_stable_diffusion.py |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!mkdir -p ~/.huggingface |
|
HUGGINGFACE_TOKEN = "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" |
|
!echo -n "{HUGGINGFACE_TOKEN}" > ~/.huggingface/token |
|
|
|
"""### Install xformers from precompiled wheel.""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""## Settings and run""" |
|
|
|
|
|
save_to_gdrive = True |
|
if save_to_gdrive: |
|
from google.colab import drive |
|
drive.mount('/content/drive') |
|
|
|
|
|
MODEL_NAME = "stabilityai/stable-diffusion-2" |
|
|
|
|
|
|
|
OUTPUT_DIR = "stable_diffusion_weights/ps1theme" |
|
if save_to_gdrive: |
|
OUTPUT_DIR = "/content/drive/MyDrive/" + OUTPUT_DIR |
|
else: |
|
OUTPUT_DIR = "/content/" + OUTPUT_DIR |
|
|
|
print(f"[*] Weights will be saved at {OUTPUT_DIR}") |
|
|
|
!mkdir -p $OUTPUT_DIR |
|
|
|
"""### Start Training |
|
|
|
Use the table below to choose the best flags based on your memory and speed requirements. Tested on Tesla T4 GPU. |
|
|
|
|
|
| `fp16` | `train_batch_size` | `gradient_accumulation_steps` | `gradient_checkpointing` | `use_8bit_adam` | GB VRAM usage | Speed (it/s) | |
|
| ---- | ------------------ | ----------------------------- | ----------------------- | --------------- | ---------- | ------------ | |
|
| fp16 | 1 | 1 | TRUE | TRUE | 9.92 | 0.93 | |
|
| no | 1 | 1 | TRUE | TRUE | 10.08 | 0.42 | |
|
| fp16 | 2 | 1 | TRUE | TRUE | 10.4 | 0.66 | |
|
| fp16 | 1 | 1 | FALSE | TRUE | 11.17 | 1.14 | |
|
| no | 1 | 1 | FALSE | TRUE | 11.17 | 0.49 | |
|
| fp16 | 1 | 2 | TRUE | TRUE | 11.56 | 1 | |
|
| fp16 | 2 | 1 | FALSE | TRUE | 13.67 | 0.82 | |
|
| fp16 | 1 | 2 | FALSE | TRUE | 13.7 | 0.83 | |
|
| fp16 | 1 | 1 | TRUE | FALSE | 15.79 | 0.77 | |
|
------------------------------------------------------------------------------ |
|
|
|
|
|
- `--gradient_checkpointing` flag is enabled by default; it reduces VRAM usage to 9.92 GB usage. |
|
|
|
- remove `--use_8bit_adam` flag for full precision. Requires 15.79 GB with `--gradient_checkpointing` else 17.8 GB. |
|
|
|
- remove `--train_text_encoder` flag to reduce memory usage further, degrades output quality. NOT RECCOMMENDED. |
|
|
|
### Define Your Concepts List |
|
You can add multiple concepts here. Try tweaking `--max_train_steps` accordingly. |
|
It's a good idea to test class prompts in Stable Diffusion V2 before committing to them. If the images V2 generates at a CFG of 7 and 50 steps aren't great, consider a different class prompt. |
|
""" |
|
|
|
concepts_list = [ |
|
{ |
|
"instance_prompt": "ps1theme", |
|
"class_prompt": "women", |
|
"instance_data_dir": "/content/drive/MyDrive/ps1theme", |
|
"class_data_dir": "/content/data/women" |
|
}, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
] |
|
|
|
|
|
import json |
|
import os |
|
for c in concepts_list: |
|
os.makedirs(c["instance_data_dir"], exist_ok=True) |
|
|
|
with open("concepts_list.json", "w") as f: |
|
json.dump(concepts_list, f, indent=4) |
|
|
|
"""### Image Upload""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
from google.colab import files |
|
import shutil |
|
|
|
for c in concepts_list: |
|
print(f"Uploading instance images for `{c['instance_prompt']}`") |
|
uploaded = files.upload() |
|
for filename in uploaded.keys(): |
|
dst_path = os.path.join(c['instance_data_dir'], filename) |
|
shutil.move(filename, dst_path) |
|
|
|
"""### Training Settings |
|
The Learning Rate in this notebook has been sped up from the default LR in previous Dreambooth notebooks; training runs slower on SD V 2. This might not be the best LR for all usecases, but does well for all the datasets I (KaliYuga) have tried so far. |
|
Please note, `gradient_checkpointing` is enabled by default. I think it produces better results, and it reduces VRAM. |
|
""" |
|
|
|
!accelerate launch train_dreambooth.py \ |
|
--pretrained_model_name_or_path=$MODEL_NAME \ |
|
--pretrained_vae_name_or_path="stabilityai/sd-vae-ft-mse" \ |
|
--output_dir=$OUTPUT_DIR \ |
|
--revision="fp16" \ |
|
--with_prior_preservation --prior_loss_weight=1.0 \ |
|
--seed=1337 \ |
|
--resolution=512 \ |
|
--train_batch_size=1 \ |
|
--train_text_encoder \ |
|
--mixed_precision="fp16" \ |
|
--use_8bit_adam \ |
|
--gradient_accumulation_steps=1 \ |
|
--gradient_checkpointing \ |
|
--learning_rate=4e-6 \ |
|
--lr_scheduler="constant" \ |
|
--lr_warmup_steps=0 \ |
|
--num_class_images=50 \ |
|
--sample_batch_size=4 \ |
|
--max_train_steps=5000 \ |
|
--save_interval=500 \ |
|
--save_sample_prompt="ps1theme" \ |
|
--concepts_list="concepts_list.json" |
|
|
|
|
|
|
|
|
|
"""### Testing your new model |
|
|
|
Once your model has finished training (or has reached a checkpoint you like), run the following cells to test it out. |
|
""" |
|
|
|
|
|
WEIGHTS_DIR = "/content/drive/MyDrive/stable_diffusion_weights/ps1theme/2000" |
|
if WEIGHTS_DIR == "": |
|
from natsort import natsorted |
|
from glob import glob |
|
import os |
|
WEIGHTS_DIR = natsorted(glob(OUTPUT_DIR + os.sep + "*"))[-1] |
|
print(f"[*] WEIGHTS_DIR={WEIGHTS_DIR}") |
|
|
|
from transformers import BertForMaskedLM |
|
|
|
model = BertForMaskedLM.from_pretrained("remi/bertabs-finetuned-extractive-abstractive-summarization") |
|
|
|
from google.colab import drive |
|
drive.mount('/content/drive') |
|
|
|
|
|
import os |
|
import matplotlib.pyplot as plt |
|
import matplotlib.image as mpimg |
|
|
|
weights_folder = OUTPUT_DIR |
|
folders = sorted([f for f in os.listdir(weights_folder) if f != "0"], key=lambda x: int(x)) |
|
|
|
row = len(folders) |
|
col = len(os.listdir(os.path.join(weights_folder, folders[0], "samples"))) |
|
scale = 4 |
|
fig, axes = plt.subplots(row, col, figsize=(col*scale, row*scale), gridspec_kw={'hspace': 0, 'wspace': 0}) |
|
|
|
for i, folder in enumerate(folders): |
|
folder_path = os.path.join(weights_folder, folder) |
|
image_folder = os.path.join(folder_path, "samples") |
|
images = [f for f in os.listdir(image_folder)] |
|
for j, image in enumerate(images): |
|
if row == 1: |
|
currAxes = axes[j] |
|
else: |
|
currAxes = axes[i, j] |
|
if i == 0: |
|
currAxes.set_title(f"Image {j}") |
|
if j == 0: |
|
currAxes.text(-0.1, 0.5, folder, rotation=0, va='center', ha='center', transform=currAxes.transAxes) |
|
image_path = os.path.join(image_folder, image) |
|
img = mpimg.imread(image_path) |
|
currAxes.imshow(img, cmap='gray') |
|
currAxes.axis('off') |
|
|
|
plt.tight_layout() |
|
plt.savefig('grid.png', dpi=72) |
|
|
|
"""#### Convert weights to ckpt to use in web UIs like AUTOMATIC1111.""" |
|
|
|
|
|
ckpt_path = WEIGHTS_DIR + "/model.ckpt" |
|
|
|
half_arg = "" |
|
|
|
fp16 = True |
|
if fp16: |
|
half_arg = "--half" |
|
!python convert_diffusers_to_original_stable_diffusion.py --model_path $WEIGHTS_DIR --checkpoint_path $ckpt_path $half_arg |
|
print(f"[*] Converted ckpt saved at {ckpt_path}") |
|
|
|
"""#### Inference""" |
|
|
|
from transformers import CLIPProcessor, CLIPModel,CLIPTokenizer |
|
|
|
import torch |
|
from torch import autocast |
|
from diffusers import StableDiffusionPipeline, DDIMScheduler |
|
from IPython.display import display |
|
|
|
model_path = WEIGHTS_DIR |
|
|
|
scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False) |
|
pipe = StableDiffusionPipeline.from_pretrained(model_path, scheduler=scheduler, safety_checker=None, torch_dtype=torch.float16).to("cuda") |
|
|
|
g_cuda = None |
|
|
|
|
|
g_cuda = torch.Generator(device='cuda') |
|
seed = 47853 |
|
g_cuda.manual_seed(seed) |
|
|
|
|
|
|
|
prompt = "ps1theme" |
|
negative_prompt = "ugly,cartoon,3d,((disfigured)),((bad art)),(deformed)),(poorly drawn)),((blurry))" |
|
num_samples = 4 |
|
guidance_scale = 7.5 |
|
num_inference_steps = 50 |
|
height = 512 |
|
width = 512 |
|
|
|
with autocast("cuda"), torch.inference_mode(): |
|
images = pipe( |
|
prompt, |
|
height=height, |
|
width=width, |
|
negative_prompt=negative_prompt, |
|
num_images_per_prompt=num_samples, |
|
num_inference_steps=num_inference_steps, |
|
guidance_scale=guidance_scale, |
|
generator=g_cuda |
|
).images |
|
|
|
for img in images: |
|
display(img) |
|
|
|
|
|
import gradio as gr |
|
|
|
def inference(prompt, negative_prompt, num_samples, height=512, width=512, num_inference_steps=50, guidance_scale=7.5): |
|
with torch.autocast("cuda"), torch.inference_mode(): |
|
return pipe( |
|
prompt, height=int(height), width=int(width), |
|
negative_prompt=negative_prompt, |
|
num_images_per_prompt=int(num_samples), |
|
num_inference_steps=int(num_inference_steps), guidance_scale=guidance_scale, |
|
generator=g_cuda |
|
).images |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
with gr.Column(): |
|
prompt = gr.Textbox(label="Prompt", value="photo of zwx dog in a bucket") |
|
negative_prompt = gr.Textbox(label="Negative Prompt", value="") |
|
run = gr.Button(value="Generate") |
|
with gr.Row(): |
|
num_samples = gr.Number(label="Number of Samples", value=4) |
|
guidance_scale = gr.Number(label="Guidance Scale", value=7.5) |
|
with gr.Row(): |
|
height = gr.Number(label="Height", value=512) |
|
width = gr.Number(label="Width", value=512) |
|
num_inference_steps = gr.Slider(label="Steps", value=50) |
|
with gr.Column(): |
|
gallery = gr.Gallery() |
|
|
|
run.click(inference, inputs=[prompt, negative_prompt, num_samples, height, width, num_inference_steps, guidance_scale], outputs=gallery) |
|
|
|
demo.launch(debug=True,share = True) |