Spaces:
Running
Running
File size: 9,011 Bytes
fcc02a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# Note, Flex2 is a highly experimental WIP model. Finetuning a model with built in controls and inpainting has not
# been done before, so you will be experimenting with me on how to do it. This is my recommended setup, but this is highly
# subject to change as we learn more about how Flex2 works.
---
job: extension
config:
# this name will be the folder and filename name
name: "my_first_flex2_lora_v1"
process:
- type: 'sd_trainer'
# root folder to save training sessions/samples/weights
training_folder: "output"
# uncomment to see performance stats in the terminal every N steps
# performance_log_every: 1000
device: cuda:0
# if a trigger word is specified, it will be added to captions of training data if it does not already exist
# alternatively, in your captions you can add [trigger] and it will be replaced with the trigger word
# trigger_word: "p3r5on"
network:
type: "lora"
linear: 32
linear_alpha: 32
save:
dtype: float16 # precision to save
save_every: 250 # save every this many steps
max_step_saves_to_keep: 4 # how many intermittent saves to keep
push_to_hub: false #change this to True to push your trained model to Hugging Face.
# You can either set up a HF_TOKEN env variable or you'll be prompted to log-in
# hf_repo_id: your-username/your-model-slug
# hf_private: true #whether the repo is private or public
datasets:
# datasets are a folder of images. captions need to be txt files with the same name as the image
# for instance image2.jpg and image2.txt. Only jpg, jpeg, and png are supported currently
# images will automatically be resized and bucketed into the resolution specified
# on windows, escape back slashes with another backslash so
# "C:\\path\\to\\images\\folder"
- folder_path: "/path/to/images/folder"
# Flex2 is trained with controls and inpainting. If you want the model to truely understand how the
# controls function with your dataset, it is a good idea to keep doing controls during training.
# this will automatically generate the controls for you before training. The current script is not
# fully optimized so this could be rather slow for large datasets, but it caches them to disk so it
# only needs to be done once. If you want to skip this step, you can set the controls to [] and it will
controls:
- "depth"
- "line"
- "pose"
- "inpaint"
# you can make custom inpainting images as well. These images must be webp or png format with an alpha.
# just erase the part of the image you want to inpaint and save it as a webp or png. Again, erase your
# train target. So the person if training a person. The automatic controls above with inpaint will
# just run a background remover mask and erase the foreground, which works well for subjects.
# inpaint_path: "/my/impaint/images"
# you can also specify existing control image pairs. It can handle multiple groups and will randomly
# select one for each step.
# control_path:
# - "/my/custom/control/images"
# - "/my/custom/control/images2"
caption_ext: "txt"
caption_dropout_rate: 0.05 # will drop out the caption 5% of time
resolution: [ 512, 768, 1024 ] # flex2 enjoys multiple resolutions
train:
batch_size: 1
# IMPORTANT! For Flex2, you must bypass the guidance embedder during training
bypass_guidance_embedding: true
steps: 3000 # total number of steps to train 500 - 4000 is a good range
gradient_accumulation: 1
train_unet: true
train_text_encoder: false # probably won't work with flex2
gradient_checkpointing: true # need the on unless you have a ton of vram
noise_scheduler: "flowmatch" # for training only
# shift works well for training fast and learning composition and style.
# for just subject, you may want to change this to sigmoid
timestep_type: 'shift' # 'linear', 'sigmoid', 'shift'
optimizer: "adamw8bit"
lr: 1e-4
optimizer_params:
weight_decay: 1e-5
# uncomment this to skip the pre training sample
# skip_first_sample: true
# uncomment to completely disable sampling
# disable_sampling: true
# uncomment to use new vell curved weighting. Experimental but may produce better results
# linear_timesteps: true
# ema will smooth out learning, but could slow it down. Defaults off
ema_config:
use_ema: false
ema_decay: 0.99
# will probably need this if gpu supports it for flex, other dtypes may not work correctly
dtype: bf16
model:
# huggingface model name or path
name_or_path: "ostris/Flex.2-preview"
arch: "flex2"
quantize: true # run 8bit mixed precision
quantize_te: true
# you can pass special training infor for controls to the model here
# percentages are decimal based so 0.0 is 0% and 1.0 is 100% of the time.
model_kwargs:
# inverts the inpainting mask, good to learn outpainting as well, recommended 0.0 for characters
invert_inpaint_mask_chance: 0.5
# this will do a normal t2i training step without inpaint when dropped out. REcommended if you want
# your lora to be able to inference with and without inpainting.
inpaint_dropout: 0.5
# randomly drops out the control image. Dropout recvommended if your want it to work without controls as well.
control_dropout: 0.5
# does a random inpaint blob. Usually a good idea to keep. Without it, the model will learn to always 100%
# fill the inpaint area with your subject. This is not always a good thing.
inpaint_random_chance: 0.5
# generates random inpaint blobs if you did not provide an inpaint image for your dataset. Inpaint breaks down fast
# if you are not training with it. Controls are a little more robust and can be left out,
# but when in doubt, always leave this on
do_random_inpainting: false
# does random blurring of the inpaint mask. Helps prevent weird edge artifacts for real workd inpainting. Leave on.
random_blur_mask: true
# applies a small amount of random dialition and restriction to the inpaint mask. Helps with edge artifacts.
# Leave on.
random_dialate_mask: true
sample:
sampler: "flowmatch" # must match train.noise_scheduler
sample_every: 250 # sample every this many steps
width: 1024
height: 1024
prompts:
# you can add [trigger] to the prompts here and it will be replaced with the trigger word
# - "[trigger] holding a sign that says 'I LOVE PROMPTS!'"\
# you can use a single inpaint or single control image on your samples.
# for controls, the ctrl_idx is 1, the images can be any name and image format.
# use either a pose/line/depth image or whatever you are training with. An example is
# - "photo of [trigger] --ctrl_idx 1 --ctrl_img /path/to/control/image.jpg"
# for an inpainting image, it must be png/webp. Erase the part of the image you want to inpaint
# IMPORTANT! the inpaint images must be ctrl_idx 0 and have .inpaint.{ext} in the name for this to work right.
# - "photo of [trigger] --ctrl_idx 0 --ctrl_img /path/to/inpaint/image.inpaint.png"
- "woman with red hair, playing chess at the park, bomb going off in the background"
- "a woman holding a coffee cup, in a beanie, sitting at a cafe"
- "a horse is a DJ at a night club, fish eye lens, smoke machine, lazer lights, holding a martini"
- "a man showing off his cool new t shirt at the beach, a shark is jumping out of the water in the background"
- "a bear building a log cabin in the snow covered mountains"
- "woman playing the guitar, on stage, singing a song, laser lights, punk rocker"
- "hipster man with a beard, building a chair, in a wood shop"
- "photo of a man, white background, medium shot, modeling clothing, studio lighting, white backdrop"
- "a man holding a sign that says, 'this is a sign'"
- "a bulldog, in a post apocalyptic world, with a shotgun, in a leather jacket, in a desert, with a motorcycle"
neg: "" # not used on flex2
seed: 42
walk_seed: true
guidance_scale: 4
sample_steps: 25
# you can add any additional meta info here. [name] is replaced with config name at top
meta:
name: "[name]"
version: '1.0'
|