init_batch_size: 2 init_iters: 8 init_config: mode: "gradient" # option: "simple", "svd", "gradient" lora_A: "unit" # option: "gaussian", "kaiming", "fan_out_kaiming", "xavier", "zeros", "unit", "orthogonal" lora_A_std: 0.01 # only needed when lora_A is "gaussian" lora_B: "unit" # option: "gaussian", "kaiming", "fan_out_kaiming", "xavier", "zeros", "unit", "orthogonal" lora_B_std: 0.01 # only needed when lora_B is "gaussian" scale: "stable" # option: "default", "stable", "unit", "normalized", "gd", "weightS" stable_gamma: 2 # only needed when scale is "stable" direction: "ArB2r" # option: "ArBr", "A2rBr", "ArB2r"(only needed when mode is "gradient") dtype: "fp32" # option: "bf16", "fp32" norm_clip: false # norm clipping