Spaces:

robinwitch
/

SynTalker

Running on Zero

App Files Files Community

SynTalker / ckpt /beatx2_cospeech_diffusion /0403_212319_diffusion_rvqvae_128.yaml

robinwitch

update

1da48bb 6 months ago

raw

history blame contribute delete

5.14 kB

	{a_encoder: null, a_fix_pre: false, a_pre_encoder: null, acc: 1, acc_weight: 0.0,
	additional_data: false, adv_weight: 20.0, ali_weight: 0.0, amsgrad: false, apex: false,
	asmr: 0.0, atcont: 0.0, atmr: 0.0, aud_prob: 1.0, audio_dims: 1, audio_f: 256, audio_fps: 16000,
	audio_norm: false, audio_rep: onset+amplitude, audio_sr: 16000, batch_size: 40,
	beat_align: true, benchmark: true, cache_only: false, cache_path: datasets/beat_cache/beat_smplx_en_emage_2_128/,
	cf: 0.0, ch: 1.0, cl: 1.0, clean_final_seconds: 0, clean_first_seconds: 0, commit: 0.02,
	config: configs/diffusion_rvqvae_128.yaml, csv_name: a2g_0, cu: 1.0, cudnn_enabled: true,
	d_lr_weight: 0.2, d_name: null, data_path: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/datasets/BEAT_SMPL/beat_v2.0.0/beat_english_v2.0.0/,
	data_path_1: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/datasets/hub/,
	dataset: beat_sep_lower, ddp: false, debug: false, decay_epochs: 200, decay_rate: 0.1,
	decode_fusion: null, depth: 3, deterministic: true, dilation_growth_rate: 3, disable_filtering: false,
	div_reg_weight: 0.0, downs_t: [3], dropout_prob: 0.3, e_name: VAESKConv, e_path: weights/AESKConv_240_100.bin,
	emb_width: 512, emo_rep: null, emotion_dims: 8, emotion_f: 0, epoch_stage: 0, epochs: 1000,
	eval_model: motion_representation, f_encoder: 'null', f_fix_pre: false, f_pre_encoder: 'null',
	fac_prob: 1.0, facial_dims: 100, facial_f: 0, facial_fps: 15, facial_norm: false,
	facial_rep: smplxflame_30, fid_weight: 0.0, finger_net: original, freeze_wordembed: false,
	fsmr: 0.0, ftmr: 0.0, fusion_mode: sum, g_name: MDM, gap_weight: 0.0, gpus: [0],
	grad_norm: 0.99, hidden_size: 768, hvqvae_multipliers: [1], id_rep: onehot, input_context: both,
	is_train: true, ita_weight: 0.0, iwa_weight: 0.0, joint_channel: 3, kld_aud_weight: 0.0,
	kld_fac_weight: 0.0, kld_weight: 0.0, l: 4, l_bins: 512, l_mu: 0.99, levels: 1,
	lf: 3.0, lh: 3.0, ll: 3.0, loader_workers: 0, log_period: 10, loss_contrastive_neg_weight: 0.005,
	loss_contrastive_pos_weight: 0.2, loss_gan_weight: 5.0, loss_kld_weight: 0.1, loss_physical_weight: 0.0,
	loss_reg_weight: 0.05, loss_regression_weight: 70.0, lr_base: 5.0e-05, lr_min: 1.0e-07,
	lr_policy: step, lu: 3.0, m_conv: 1.0, m_decoder: null, m_encoder: 'null', m_fix_pre: false,
	m_pre_encoder: 'null', mean_pose_path: /mnt/fu09a/chenbohong/PantoMatrix/beatx_2_330_mean.npy,
	mean_trans_path: /mnt/fu09a/chenbohong/PantoMatrix/beatx_2_trans_mean.npy, model: denoiser,
	momentum: 0.8, motion_f: 256, msmr: 0.0, mtmr: 0.0, multi_length_training: [1.0],
	n_layer: 1, n_poses: 34, n_pre_poses: 4, name: 0403_212319_diffusion_rvqvae_128,
	nesterov: true, new_cache: false, no_adv_epoch: 999, notes: '', opt: adam, opt_betas: [
	0.5, 0.999], ori_joints: beat_smplx_joints, out_path: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/outputs/audio2pose/,
	pos_encoding_type: sin, pos_prob: 1.0, pose_dims: 330, pose_fps: 30, pose_length: 128,
	pose_norm: true, pose_rep: smplxflame_30, pre_frames: 4, pre_type: zero, pretrain: false,
	project: s2g, queue_size: 1024, random_seed: 2021, rec_aud_weight: 0.0, rec_fac_weight: 0.0,
	rec_pos_weight: 0.0, rec_txt_weight: 0.0, rec_ver_weight: 0.0, rec_weight: 1.0,
	root_path: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/, root_weight: 1.0,
	rot6d: true, sample_length: 34, sem_rep: null, sparse: 1, speaker_dims: 4, speaker_f: 0,
	speaker_id: onehot, stat: ts, std_pose_path: /mnt/fu09a/chenbohong/PantoMatrix/beatx_2_330_std.npy,
	std_trans_path: /mnt/fu09a/chenbohong/PantoMatrix/beatx_2_trans_std.npy, stride: 20,
	strides_t: [2], t_encoder: 'null', t_fix_pre: false, t_pre_encoder: fasttext, tar_joints: beat_smplx_full,
	test_ckpt: /mnt/fu09a/chenbohong/PantoMatrix/scripts/EMAGE_2024/outputs/audio2pose/custom/0330_140056_diffusion_rvqvae/last_300.bin,
	test_data_path: /datasets/trinity/test/, test_length: 128, test_period: 20, train_data_path: /datasets/trinity/train/,
	train_trans: true, trainer: diffusion_rvqvae, training_speakers: [2], tsmr: 0.0,
	ttmr: 0.0, txt_prob: 1.0, use_amass: false, use_aug: false, use_bottleneck: true,
	use_trans: true, vae_codebook_size: 256, vae_grow: [1, 1, 2, 1], vae_layer: 4, vae_length: 240,
	vae_quantizer_lambda: 1.0, vae_test_dim: 330, vae_test_len: 32, vae_test_stride: 20,
	val_data_path: /datasets/trinity/val/, variational: false, vel: 1, vel_weight: 0.0,
	vqvae_ckpt: null, vqvae_hands_path: /mnt/fu09a/chenbohong/gdc/T2M-GPT/output_beatx2/RVQVAE_hands/net_300000.pth,
	vqvae_latent_scale: 5.0, vqvae_lower_path: /mnt/fu09a/chenbohong/gdc/T2M-GPT/output_beatx2/RVQVAE_lower/net_300000.pth,
	vqvae_lower_trans_path: /mnt/fu09a/chenbohong/gdc/T2M-GPT/output_beatx2/RVQVAE_lower_trans/net_300000.pth,
	vqvae_reverse_decoder_dilation: true, vqvae_squeeze_scale: 4, vqvae_type: rvqvae,
	vqvae_upper_path: /mnt/fu09a/chenbohong/gdc/T2M-GPT/output_beatx2/RVQVAE_upper/net_300000.pth,
	warmup_epochs: 0, warmup_lr: 0.0005, wei_weight: 0.0, weight_decay: 0.0, width: 512,
	word_cache: false, word_dims: 300, word_f: 256, word_index_num: 11195, word_rep: textgrid,
	z_type: speaker}