SynTalker / configs /beat2_rvqvae.yaml
robinwitch's picture
update
1da48bb
is_train: True
ddp: False
stat: ts
root_path: ./
out_path: ./outputs/audio2pose/
project: s2g
data_path: ./datasets/BEAT_SMPL/beat_v2.0.0/beat_english_v2.0.0/
e_path: weights/AESKConv_240_100.bin
eval_model: motion_representation
e_name: VAESKConv
test_ckpt: ./outputs/audio2pose/custom/0112_001634_emage/last_200.bin
data_path_1: ./datasets/hub/
vae_test_len: 32
vae_test_dim: 330
vae_test_stride: 20
vae_length: 240
vae_codebook_size: 256
vae_layer: 4
vae_grow: [1,1,2,1]
variational: False
# data config
training_speakers: [2] #[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] #[2]
additional_data: False
cache_path: datasets/beat_cache/beat_smplx_en_emage_2_rvqvae/
dataset: mix_sep
new_cache: True
use_amass: False
# motion config
ori_joints: beat_smplx_joints
tar_joints: beat_smplx_full
pose_rep: smplxflame_30
pose_norm: False
pose_fps: 30
rot6d: True
pre_frames: 4
pose_dims: 330
pose_length: 64
stride: 20
test_length: 64
motion_f: 256
m_pre_encoder: null
m_encoder: null
m_fix_pre: False
# audio config
audio_rep: onset+amplitude
audio_sr: 16000
audio_fps: 16000
audio_norm: False
audio_f: 256
# a_pre_encoder: tcn_camn
# a_encoder: none
# a_fix_pre: False
# text config
word_rep: textgrid
word_index_num: 11195
word_dims: 300
freeze_wordembed: False
word_f: 256
t_pre_encoder: fasttext
t_encoder: null
t_fix_pre: False
# facial config
facial_rep: smplxflame_30
facial_dims: 100
facial_norm: False
facial_f: 0
f_pre_encoder: null
f_encoder: null
f_fix_pre: False
# speaker config
id_rep: onehot
speaker_f: 0
# model config
batch_size: 80 #80
# warmup_epochs: 1
# warmup_lr: 1e-6
lr_base: 4e-4
model: motion_representation
g_name: VQVAEConvZero
trainer: ae_total
hidden_size: 768
n_layer: 1
rec_weight: 1
grad_norm: 0.99
epochs: 200
test_period: 20
ll: 3
lf: 3
lu: 3
lh: 3
cl: 1
cf: 0
cu: 1
ch: 1
#below is vavae config, copy from QPGESTURE
#Codebook Configs
levels: 1
downs_t: [3]
strides_t : [2]
emb_width : 512
l_bins : 512
l_mu : 0.99
commit : 0.1
hvqvae_multipliers : [1]
width: 512
depth: 3
m_conv : 1.0
dilation_growth_rate : 3
sample_length: 80
use_bottleneck: True
joint_channel: 6
# depth: 3
# width: 128
# m_conv: 1.0
# dilation_growth_rate: 1
# dilation_cycle: None
vel: 1 # 1 -> 0
acc: 1 # 1 -> 0
vqvae_reverse_decoder_dilation: True
## below is special for emage
rec_pos_weight : 1.0