ersdd
/

temp

Model card Files Files and versions Community

ersdd commited on Jan 14

Commit

b2859bf

verified ·

1 Parent(s): dbff61b

Delete cldm.yaml

Browse files

Files changed (1) hide show

cldm.yaml +0 -106

cldm.yaml DELETED Viewed

@@ -1,106 +0,0 @@
-target: model.cldm.ControlLDM
-params:
-  linear_start: 0.00085
-  linear_end: 0.0120
-  num_timesteps_cond: 1
-  log_every_t: 200
-  timesteps: 1000
-  first_stage_key: "jpg"
-  cond_stage_key: "txt"
-  control_key: "hint"
-  image_size: 64
-  channels: 4
-  cond_stage_trainable: false
-  conditioning_key: crossattn
-  monitor: val/loss_simple_ema
-  scale_factor: 0.18215
-  use_ema: False
-  sd_locked: True
-  only_mid_control: False
-  # Learning rate.
-  learning_rate: 1e-4
-  control_stage_config:
-    target: model.cldm.ControlNet
-    params:
-      use_checkpoint: True
-      image_size: 32 # unused
-      in_channels: 4
-      hint_channels: 4
-      model_channels: 320
-      attention_resolutions: [ 4, 2, 1 ]
-      num_res_blocks: 2
-      channel_mult: [ 1, 2, 4, 4 ]
-      num_head_channels: 64 # need to fix for flash-attn
-      use_spatial_transformer: True
-      use_linear_in_transformer: True
-      transformer_depth: 1
-      context_dim: 1024
-      legacy: False
-  unet_config:
-    target: model.cldm.ControlledUnetModel
-    params:
-      use_checkpoint: True
-      image_size: 32 # unused
-      in_channels: 4
-      out_channels: 4
-      model_channels: 320
-      attention_resolutions: [ 4, 2, 1 ]
-      num_res_blocks: 2
-      channel_mult: [ 1, 2, 4, 4 ]
-      num_head_channels: 64 # need to fix for flash-attn
-      use_spatial_transformer: True
-      use_linear_in_transformer: True
-      transformer_depth: 1
-      context_dim: 1024
-      legacy: False
-  first_stage_config:
-    target: ldm.models.autoencoder.AutoencoderKL
-    params:
-      embed_dim: 4
-      monitor: val/rec_loss
-      ddconfig:
-        #attn_type: "vanilla-xformers"
-        double_z: true
-        z_channels: 4
-        resolution: 256
-        in_channels: 3
-        out_ch: 3
-        ch: 128
-        ch_mult:
-        - 1
-        - 2
-        - 4
-        - 4
-        num_res_blocks: 2
-        attn_resolutions: []
-        dropout: 0.0
-      lossconfig:
-        target: torch.nn.Identity
-  cond_stage_config:
-    target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
-    params:
-      freeze: True
-      layer: "penultimate"
-  preprocess_config:
-    target: model.swinir.SwinIR
-    params:
-      img_size: 64
-      patch_size: 1
-      in_chans: 3
-      embed_dim: 180
-      depths: [6, 6, 6, 6, 6, 6, 6, 6]
-      num_heads: [6, 6, 6, 6, 6, 6, 6, 6]
-      window_size: 8
-      mlp_ratio: 2
-      sf: 8
-      img_range: 1.0
-      upsampler: "nearest+conv"
-      resi_connection: "1conv"
-      unshuffle: True
-      unshuffle_scale: 8