SynTalker / ckpt /beatx2_cospeech_diffusion /0403_212319_diffusion_rvqvae_128.yaml
robinwitch's picture
update
1da48bb
{a_encoder: null, a_fix_pre: false, a_pre_encoder: null, acc: 1, acc_weight: 0.0,
additional_data: false, adv_weight: 20.0, ali_weight: 0.0, amsgrad: false, apex: false,
asmr: 0.0, atcont: 0.0, atmr: 0.0, aud_prob: 1.0, audio_dims: 1, audio_f: 256, audio_fps: 16000,
audio_norm: false, audio_rep: onset+amplitude, audio_sr: 16000, batch_size: 40,
beat_align: true, benchmark: true, cache_only: false, cache_path: datasets/beat_cache/beat_smplx_en_emage_2_128/,
cf: 0.0, ch: 1.0, cl: 1.0, clean_final_seconds: 0, clean_first_seconds: 0, commit: 0.02,
config: configs/diffusion_rvqvae_128.yaml, csv_name: a2g_0, cu: 1.0, cudnn_enabled: true,
d_lr_weight: 0.2, d_name: null, data_path: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/datasets/BEAT_SMPL/beat_v2.0.0/beat_english_v2.0.0/,
data_path_1: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/datasets/hub/,
dataset: beat_sep_lower, ddp: false, debug: false, decay_epochs: 200, decay_rate: 0.1,
decode_fusion: null, depth: 3, deterministic: true, dilation_growth_rate: 3, disable_filtering: false,
div_reg_weight: 0.0, downs_t: [3], dropout_prob: 0.3, e_name: VAESKConv, e_path: weights/AESKConv_240_100.bin,
emb_width: 512, emo_rep: null, emotion_dims: 8, emotion_f: 0, epoch_stage: 0, epochs: 1000,
eval_model: motion_representation, f_encoder: 'null', f_fix_pre: false, f_pre_encoder: 'null',
fac_prob: 1.0, facial_dims: 100, facial_f: 0, facial_fps: 15, facial_norm: false,
facial_rep: smplxflame_30, fid_weight: 0.0, finger_net: original, freeze_wordembed: false,
fsmr: 0.0, ftmr: 0.0, fusion_mode: sum, g_name: MDM, gap_weight: 0.0, gpus: [0],
grad_norm: 0.99, hidden_size: 768, hvqvae_multipliers: [1], id_rep: onehot, input_context: both,
is_train: true, ita_weight: 0.0, iwa_weight: 0.0, joint_channel: 3, kld_aud_weight: 0.0,
kld_fac_weight: 0.0, kld_weight: 0.0, l: 4, l_bins: 512, l_mu: 0.99, levels: 1,
lf: 3.0, lh: 3.0, ll: 3.0, loader_workers: 0, log_period: 10, loss_contrastive_neg_weight: 0.005,
loss_contrastive_pos_weight: 0.2, loss_gan_weight: 5.0, loss_kld_weight: 0.1, loss_physical_weight: 0.0,
loss_reg_weight: 0.05, loss_regression_weight: 70.0, lr_base: 5.0e-05, lr_min: 1.0e-07,
lr_policy: step, lu: 3.0, m_conv: 1.0, m_decoder: null, m_encoder: 'null', m_fix_pre: false,
m_pre_encoder: 'null', mean_pose_path: /mnt/fu09a/chenbohong/PantoMatrix/beatx_2_330_mean.npy,
mean_trans_path: /mnt/fu09a/chenbohong/PantoMatrix/beatx_2_trans_mean.npy, model: denoiser,
momentum: 0.8, motion_f: 256, msmr: 0.0, mtmr: 0.0, multi_length_training: [1.0],
n_layer: 1, n_poses: 34, n_pre_poses: 4, name: 0403_212319_diffusion_rvqvae_128,
nesterov: true, new_cache: false, no_adv_epoch: 999, notes: '', opt: adam, opt_betas: [
0.5, 0.999], ori_joints: beat_smplx_joints, out_path: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/outputs/audio2pose/,
pos_encoding_type: sin, pos_prob: 1.0, pose_dims: 330, pose_fps: 30, pose_length: 128,
pose_norm: true, pose_rep: smplxflame_30, pre_frames: 4, pre_type: zero, pretrain: false,
project: s2g, queue_size: 1024, random_seed: 2021, rec_aud_weight: 0.0, rec_fac_weight: 0.0,
rec_pos_weight: 0.0, rec_txt_weight: 0.0, rec_ver_weight: 0.0, rec_weight: 1.0,
root_path: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/, root_weight: 1.0,
rot6d: true, sample_length: 34, sem_rep: null, sparse: 1, speaker_dims: 4, speaker_f: 0,
speaker_id: onehot, stat: ts, std_pose_path: /mnt/fu09a/chenbohong/PantoMatrix/beatx_2_330_std.npy,
std_trans_path: /mnt/fu09a/chenbohong/PantoMatrix/beatx_2_trans_std.npy, stride: 20,
strides_t: [2], t_encoder: 'null', t_fix_pre: false, t_pre_encoder: fasttext, tar_joints: beat_smplx_full,
test_ckpt: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/outputs/audio2pose/custom/0330_140056_diffusion_rvqvae/last_300.bin,
test_data_path: /datasets/trinity/test/, test_length: 128, test_period: 20, train_data_path: /datasets/trinity/train/,
train_trans: true, trainer: diffusion_rvqvae, training_speakers: [2], tsmr: 0.0,
ttmr: 0.0, txt_prob: 1.0, use_amass: false, use_aug: false, use_bottleneck: true,
use_trans: true, vae_codebook_size: 256, vae_grow: [1, 1, 2, 1], vae_layer: 4, vae_length: 240,
vae_quantizer_lambda: 1.0, vae_test_dim: 330, vae_test_len: 32, vae_test_stride: 20,
val_data_path: /datasets/trinity/val/, variational: false, vel: 1, vel_weight: 0.0,
vqvae_ckpt: null, vqvae_hands_path: /mnt/fu09a/chenbohong/gdc/T2M-GPT/output_beatx2/RVQVAE_hands/net_300000.pth,
vqvae_latent_scale: 5.0, vqvae_lower_path: /mnt/fu09a/chenbohong/gdc/T2M-GPT/output_beatx2/RVQVAE_lower/net_300000.pth,
vqvae_lower_trans_path: /mnt/fu09a/chenbohong/gdc/T2M-GPT/output_beatx2/RVQVAE_lower_trans/net_300000.pth,
vqvae_reverse_decoder_dilation: true, vqvae_squeeze_scale: 4, vqvae_type: rvqvae,
vqvae_upper_path: /mnt/fu09a/chenbohong/gdc/T2M-GPT/output_beatx2/RVQVAE_upper/net_300000.pth,
warmup_epochs: 0, warmup_lr: 0.0005, wei_weight: 0.0, weight_decay: 0.0, width: 512,
word_cache: false, word_dims: 300, word_f: 256, word_index_num: 11195, word_rep: textgrid,
z_type: speaker}