WangHelin1997 commited on
Commit
aca1242
·
1 Parent(s): 614774d
Files changed (1) hide show
  1. nar_pretrain.yaml +71 -0
nar_pretrain.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: pretrain
2
+
3
+ model:
4
+ dim: 1024
5
+ depth: 24
6
+ heads: 16
7
+ ff_mult: 4
8
+ text_dim: 512
9
+ # disable convnext in text embedding
10
+ conv_layers: 0
11
+ # phoneme vocab size
12
+ text_num_embeds: 200
13
+ mel_dim: 100
14
+ t5_dim: 1024
15
+ clap_dim: 512
16
+ # disable it on a100
17
+ use_checkpoint: false
18
+ qk_norm: true
19
+ skip: true
20
+
21
+ mel:
22
+ target_sample_rate: 24000
23
+ n_mel_channels: 100
24
+ hop_length: 256
25
+
26
+ opt:
27
+ learning_rate: 2.0e-04
28
+ beta1: 0.9
29
+ beta2: 0.999
30
+ weight_decay: 0.01
31
+ adam_epsilon: 1.0e-08
32
+ grad_clip: 1.0
33
+ batch_size: 64
34
+ accumulation_steps: 1
35
+ # mask_range: [0.7, 1.0]
36
+ drop_spk: 0.1
37
+ drop_text: 0.5
38
+
39
+ lr_scheduler:
40
+ warmup_steps: 5000
41
+ decay_steps: 150000
42
+ end_factor: 1.0e-02
43
+
44
+ data:
45
+ trainset:
46
+ dataset_dir: "" # your processed path
47
+ clap_emb_dir: "./data/clap_embs/"
48
+ t5_folder_name: "t5"
49
+ phn_folder_name: "g2p"
50
+ manifest_name: "manifest"
51
+ json_name: "jsons"
52
+ dynamic_batching: true
53
+ text_pad_token: -1
54
+ audio_pad_token: 0.0
55
+ split: "train_PT"
56
+ sr: 24000
57
+ norm_audio: false
58
+
59
+ valset:
60
+ dataset_dir: "" # your processed path
61
+ clap_emb_dir: "./data/clap_embs/"
62
+ t5_folder_name: "t5"
63
+ phn_folder_name: "g2p"
64
+ manifest_name: "manifest"
65
+ json_name: "jsons"
66
+ dynamic_batching: true
67
+ text_pad_token: -1
68
+ audio_pad_token: 0.0
69
+ split: "validation_PT"
70
+ sr: 24000
71
+ norm_audio: false