mellow / v0.yaml
soham97's picture
first
6b09558
raw
history blame
373 Bytes
data:
sampling_rate: 32000
segment_seconds: 10
tokenizer_type: "HuggingFaceTB/SmolLM2-135M"
text_tokenization_len: 129
model:
encoder:
audioenc_name: 'HTSAT'
transformer_embed_dim: 768
out_emb: 768
d_proj: 576
decoder:
text_decoder: "HuggingFaceTB/SmolLM2-135M"
prefix_length: 389
model_type: Mellow