Audio-to-Audio
Safetensors
torch
lucadellalib commited on
Commit
f64e051
·
verified ·
1 Parent(s): 25c95ab

Upload 3 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ focalcodec.png filter=lfs diff=lfs merge=lfs -text
LibriTTS960_50Hz.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "encoder_name": "WavLM",
3
+ "encoder_config": {
4
+ "hidden_dims": [512, 512, 512, 512, 512, 512, 512],
5
+ "kernel_sizes": [10, 3, 3, 3, 3, 2, 2],
6
+ "strides": [5, 2, 2, 2, 2, 2, 2],
7
+ "num_layers": 6,
8
+ "dim": 1024,
9
+ "ffn_dim": 4096,
10
+ "num_heads": 16,
11
+ "num_buckets": 320,
12
+ "max_distance": 800,
13
+ "dropout": 0.0,
14
+ "conv_pos": 128,
15
+ "conv_pos_groups": 16
16
+ },
17
+ "compressor_name": "FocalEncoder",
18
+ "compressor_config": {
19
+ "input_dim": 1024,
20
+ "output_dim": 13,
21
+ "hidden_dims": [1024, 512, 256],
22
+ "downscale_factors": [1, 1, 1],
23
+ "focal_window": 7,
24
+ "focal_level": 2,
25
+ "focal_factor": 2,
26
+ "dropout": 0.0,
27
+ "use_post_norm": false,
28
+ "use_layerscale": false,
29
+ "layerscale_init": 0.0001,
30
+ "normalize_modulator": false
31
+ },
32
+ "quantizer_name": "BinarySphericalQuantizer",
33
+ "quantizer_config": {
34
+ "codebook_size": 8192
35
+ },
36
+ "decompressor_name": "FocalDecoder",
37
+ "decompressor_config": {
38
+ "input_dim": 13,
39
+ "output_dim": 1024,
40
+ "hidden_dims": [256, 512, 1024],
41
+ "upscale_factors": [1, 1, 1],
42
+ "focal_window": 7,
43
+ "focal_level": 2,
44
+ "focal_factor": 2,
45
+ "dropout": 0.0,
46
+ "use_post_norm": false,
47
+ "use_layerscale": false,
48
+ "layerscale_init": 0.0001,
49
+ "normalize_modulator": false
50
+ },
51
+ "decoder_name": "Vocos",
52
+ "decoder_config": {
53
+ "input_channels": 1024,
54
+ "num_layers": 8,
55
+ "dim": 512,
56
+ "ffn_dim": 1536,
57
+ "kernel_size": 7,
58
+ "padding": 3,
59
+ "layerscale_init": null,
60
+ "n_fft": 1024,
61
+ "hop_length": 320
62
+ }
63
+ }
LibriTTS960_50Hz.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2700a916ca8d1c11a899995ef8d451ee53a481486650c4b78cd96feff9ac77f0
3
+ size 568554604
focalcodec.png ADDED

Git LFS Details

  • SHA256: 93eefb4b78b4ee860c678e8408456516082ef4f6fcf9cce9a831e234ea260b84
  • Pointer size: 131 Bytes
  • Size of remote file: 406 kB