File size: 373 Bytes
2c6e7ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
data:
    sampling_rate: 32000
    segment_seconds: 10
    tokenizer_type: "HuggingFaceTB/SmolLM2-135M"
    text_tokenization_len: 129

model:
    encoder:
        audioenc_name: 'HTSAT'
        transformer_embed_dim: 768
        out_emb: 768
        d_proj: 576
    decoder:
      text_decoder: "HuggingFaceTB/SmolLM2-135M"
      prefix_length: 389
    model_type: Mellow