File size: 2,367 Bytes
1da48bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
is_train: True
ddp: False
stat: ts
root_path: ./
out_path: ./outputs/audio2pose/
project: s2g
data_path: ./datasets/BEAT_SMPL/beat_v2.0.0/beat_english_v2.0.0/
e_path:  weights/AESKConv_240_100.bin
eval_model: motion_representation
e_name: VAESKConv
test_ckpt: ./outputs/audio2pose/custom/0112_001634_emage/last_200.bin
data_path_1: ./datasets/hub/

vae_test_len: 32
vae_test_dim: 330
vae_test_stride: 20
vae_length: 240
vae_codebook_size: 256
vae_layer: 4
vae_grow: [1,1,2,1]
variational: False

# data config
training_speakers: [2] #[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] #[2]
additional_data: False
cache_path: datasets/beat_cache/beat_smplx_en_emage_2_rvqvae/
dataset: mix_sep
new_cache: True
use_amass: False
# motion config
ori_joints: beat_smplx_joints
tar_joints: beat_smplx_full
pose_rep: smplxflame_30
pose_norm: False
pose_fps: 30
rot6d: True
pre_frames: 4
pose_dims: 330
pose_length: 64
stride: 20
test_length: 64
motion_f: 256
m_pre_encoder: null
m_encoder: null
m_fix_pre: False

# audio config
audio_rep: onset+amplitude
audio_sr: 16000
audio_fps: 16000
audio_norm: False
audio_f: 256
# a_pre_encoder: tcn_camn
# a_encoder: none
# a_fix_pre: False

# text config
word_rep: textgrid
word_index_num: 11195
word_dims: 300
freeze_wordembed: False
word_f: 256
t_pre_encoder: fasttext
t_encoder: null
t_fix_pre: False

# facial config
facial_rep: smplxflame_30
facial_dims: 100
facial_norm: False
facial_f: 0
f_pre_encoder: null
f_encoder: null
f_fix_pre: False

# speaker config
id_rep: onehot
speaker_f: 0

# model config
batch_size: 80  #80
# warmup_epochs: 1
# warmup_lr: 1e-6
lr_base: 4e-4
model: motion_representation
g_name: VQVAEConvZero
trainer: ae_total
hidden_size: 768
n_layer: 1
 
rec_weight: 1
grad_norm: 0.99
epochs: 200
test_period: 20
ll: 3
lf: 3
lu: 3
lh: 3
cl: 1
cf: 0
cu: 1
ch: 1



#below is vavae config, copy from QPGESTURE
#Codebook Configs
levels: 1
downs_t: [3]
strides_t : [2]
emb_width : 512
l_bins : 512
l_mu : 0.99
commit : 0.1
hvqvae_multipliers : [1]
width: 512
depth: 3
m_conv : 1.0
dilation_growth_rate : 3
sample_length: 80
use_bottleneck: True
joint_channel: 6
# depth: 3
# width: 128
# m_conv: 1.0
# dilation_growth_rate: 1
# dilation_cycle: None
vel: 1      # 1 -> 0
acc: 1      # 1 -> 0
vqvae_reverse_decoder_dilation: True


## below is special for emage
rec_pos_weight : 1.0