jiangdongwei commited on
Commit
3f3aa3a
·
1 Parent(s): 7564d88

add README

Browse files
Files changed (1) hide show
  1. README.md +826 -0
README.md ADDED
@@ -0,0 +1,826 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - LIUM/tedlium
4
+ language:
5
+ - en
6
+ metrics:
7
+ - wer
8
+ library_name: espnet
9
+ pipeline_tag: automatic-speech-recognition
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `espnet/tedlium3`
15
+
16
+ This model was trained by Dongwei Jiang using tedlium3 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ ```bash
21
+ cd espnet
22
+ git checkout ff841366229d539eb74d23ac999cae7c0cc62cad
23
+ pip install -e .
24
+ cd egs2/tedlium3/asr1
25
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/dongwei_tedlium3_asr_e-branchformer_external_lm
26
+ ```
27
+
28
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
29
+ # RESULTS
30
+ ## Environments
31
+ - date: `Tue Apr 11 01:15:36 EDT 2023`
32
+ - python version: `3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0]`
33
+ - espnet version: `espnet 202301`
34
+ - pytorch version: `pytorch 1.8.1`
35
+ - Git hash: `b0cceeac2ecd330e8270789cef945e49058858fa`
36
+ - Commit date: `Thu Mar 30 08:26:54 2023 -0400`
37
+
38
+ ## exp/asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp
39
+ ### WER
40
+
41
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
42
+ |---|---|---|---|---|---|---|---|---|
43
+ |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|27500|94.2|2.5|3.3|0.6|6.4|59.2|
44
+
45
+ ### CER
46
+
47
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
48
+ |---|---|---|---|---|---|---|---|---|
49
+ |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|145066|96.8|0.5|2.7|0.6|3.8|59.2|
50
+
51
+ ### TER
52
+
53
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
54
+ |---|---|---|---|---|---|---|---|---|
55
+ |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|54206|95.8|1.6|2.6|0.5|4.7|59.2|
56
+
57
+ ## exp/asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave
58
+ ### WER
59
+
60
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
61
+ |---|---|---|---|---|---|---|---|---|
62
+ |org/dev|507|17783|93.6|3.1|3.3|0.9|7.3|69.0|
63
+
64
+ ### CER
65
+
66
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
67
+ |---|---|---|---|---|---|---|---|---|
68
+ |org/dev|507|95429|96.5|0.7|2.8|0.8|4.4|69.0|
69
+
70
+ ### TER
71
+
72
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
73
+ |---|---|---|---|---|---|---|---|---|
74
+ |org/dev|507|36002|95.4|2.0|2.6|0.8|5.5|69.0|
75
+
76
+
77
+ ## ASR config
78
+
79
+ <details><summary>expand</summary>
80
+
81
+ ```
82
+ config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_e12_mactrue.yaml
83
+ print_config: false
84
+ log_level: INFO
85
+ dry_run: false
86
+ iterator_type: sequence
87
+ output_dir: exp/asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp
88
+ ngpu: 1
89
+ seed: 2022
90
+ num_workers: 6
91
+ num_att_plot: 3
92
+ dist_backend: nccl
93
+ dist_init_method: env://
94
+ dist_world_size: 4
95
+ dist_rank: 0
96
+ local_rank: 0
97
+ dist_master_addr: localhost
98
+ dist_master_port: 33461
99
+ dist_launcher: null
100
+ multiprocessing_distributed: true
101
+ unused_parameters: false
102
+ sharded_ddp: false
103
+ cudnn_enabled: true
104
+ cudnn_benchmark: false
105
+ cudnn_deterministic: true
106
+ collect_stats: false
107
+ write_collected_feats: false
108
+ max_epoch: 50
109
+ patience: null
110
+ val_scheduler_criterion:
111
+ - valid
112
+ - loss
113
+ early_stopping_criterion:
114
+ - valid
115
+ - loss
116
+ - min
117
+ best_model_criterion:
118
+ - - valid
119
+ - acc
120
+ - max
121
+ keep_nbest_models: 10
122
+ nbest_averaging_interval: 0
123
+ grad_clip: 5.0
124
+ grad_clip_type: 2.0
125
+ grad_noise: false
126
+ accum_grad: 1
127
+ no_forward_run: false
128
+ resume: true
129
+ train_dtype: float32
130
+ use_amp: true
131
+ log_interval: null
132
+ use_matplotlib: true
133
+ use_tensorboard: true
134
+ create_graph_in_tensorboard: false
135
+ use_wandb: false
136
+ wandb_project: null
137
+ wandb_id: null
138
+ wandb_entity: null
139
+ wandb_name: null
140
+ wandb_model_log_interval: -1
141
+ detect_anomaly: false
142
+ pretrain_path: null
143
+ init_param: []
144
+ ignore_init_mismatch: false
145
+ freeze_param: []
146
+ num_iters_per_epoch: null
147
+ batch_size: 20
148
+ valid_batch_size: null
149
+ batch_bins: 50000000
150
+ valid_batch_bins: null
151
+ train_shape_file:
152
+ - exp/asr_stats_raw_en_bpe500_sp/train/speech_shape
153
+ - exp/asr_stats_raw_en_bpe500_sp/train/text_shape.bpe
154
+ valid_shape_file:
155
+ - exp/asr_stats_raw_en_bpe500_sp/valid/speech_shape
156
+ - exp/asr_stats_raw_en_bpe500_sp/valid/text_shape.bpe
157
+ batch_type: numel
158
+ valid_batch_type: null
159
+ fold_length:
160
+ - 80000
161
+ - 150
162
+ sort_in_batch: descending
163
+ sort_batch: descending
164
+ multiple_iterator: false
165
+ chunk_length: 500
166
+ chunk_shift_ratio: 0.5
167
+ num_cache_chunks: 1024
168
+ chunk_excluded_key_prefixes: []
169
+ train_data_path_and_name_and_type:
170
+ - - dump/raw/train_sp/wav.scp
171
+ - speech
172
+ - kaldi_ark
173
+ - - dump/raw/train_sp/text
174
+ - text
175
+ - text
176
+ valid_data_path_and_name_and_type:
177
+ - - dump/raw/dev/wav.scp
178
+ - speech
179
+ - kaldi_ark
180
+ - - dump/raw/dev/text
181
+ - text
182
+ - text
183
+ allow_variable_data_keys: false
184
+ max_cache_size: 0.0
185
+ max_cache_fd: 32
186
+ valid_max_cache_size: null
187
+ exclude_weight_decay: false
188
+ exclude_weight_decay_conf: {}
189
+ optim: adam
190
+ optim_conf:
191
+ lr: 0.002
192
+ weight_decay: 1.0e-06
193
+ scheduler: warmuplr
194
+ scheduler_conf:
195
+ warmup_steps: 15000
196
+ token_list:
197
+ - <blank>
198
+ - <unk>
199
+ - '[unk]'
200
+ - ▁
201
+ - s
202
+ - ▁the
203
+ - t
204
+ - ▁and
205
+ - e
206
+ - ▁a
207
+ - ▁to
208
+ - d
209
+ - ▁of
210
+ - ''''
211
+ - n
212
+ - ing
213
+ - ▁in
214
+ - ▁that
215
+ - re
216
+ - ▁i
217
+ - c
218
+ - o
219
+ - u
220
+ - ▁we
221
+ - y
222
+ - a
223
+ - ed
224
+ - ▁it
225
+ - ▁you
226
+ - i
227
+ - m
228
+ - ▁is
229
+ - er
230
+ - p
231
+ - g
232
+ - w
233
+ - al
234
+ - ▁this
235
+ - ▁so
236
+ - f
237
+ - le
238
+ - b
239
+ - ar
240
+ - ▁f
241
+ - k
242
+ - ▁c
243
+ - r
244
+ - in
245
+ - or
246
+ - ▁for
247
+ - ▁be
248
+ - ve
249
+ - ▁was
250
+ - te
251
+ - th
252
+ - ▁do
253
+ - es
254
+ - ly
255
+ - ▁they
256
+ - ro
257
+ - ▁are
258
+ - ▁with
259
+ - ▁have
260
+ - an
261
+ - v
262
+ - ch
263
+ - ▁on
264
+ - se
265
+ - lo
266
+ - ▁but
267
+ - en
268
+ - ri
269
+ - li
270
+ - ▁what
271
+ - it
272
+ - ic
273
+ - ▁can
274
+ - l
275
+ - ur
276
+ - ce
277
+ - ent
278
+ - ▁me
279
+ - ▁b
280
+ - ▁ma
281
+ - ▁he
282
+ - ra
283
+ - ▁de
284
+ - ll
285
+ - at
286
+ - ▁about
287
+ - ▁one
288
+ - ▁not
289
+ - ne
290
+ - ▁all
291
+ - ▁my
292
+ - ter
293
+ - el
294
+ - il
295
+ - ▁there
296
+ - 'on'
297
+ - ad
298
+ - ▁mo
299
+ - ol
300
+ - ation
301
+ - nd
302
+ - ▁like
303
+ - ▁people
304
+ - po
305
+ - ▁at
306
+ - ▁us
307
+ - us
308
+ - ▁g
309
+ - ci
310
+ - ▁our
311
+ - h
312
+ - pe
313
+ - ▁as
314
+ - ▁from
315
+ - vi
316
+ - ▁if
317
+ - as
318
+ - ▁ex
319
+ - ▁con
320
+ - ▁an
321
+ - ver
322
+ - ▁out
323
+ - ▁just
324
+ - un
325
+ - ▁see
326
+ - la
327
+ - ▁di
328
+ - ▁when
329
+ - ▁now
330
+ - ▁p
331
+ - ha
332
+ - ▁who
333
+ - ck
334
+ - ▁these
335
+ - ▁because
336
+ - ▁or
337
+ - ▁know
338
+ - ion
339
+ - ir
340
+ - ▁co
341
+ - ▁up
342
+ - ▁pa
343
+ - ment
344
+ - ▁think
345
+ - ge
346
+ - ▁how
347
+ - ide
348
+ - ▁by
349
+ - ul
350
+ - ity
351
+ - ▁go
352
+ - ▁get
353
+ - ▁ho
354
+ - ive
355
+ - ▁very
356
+ - ate
357
+ - ng
358
+ - ▁no
359
+ - ▁had
360
+ - ac
361
+ - ▁bo
362
+ - ry
363
+ - ▁more
364
+ - ▁them
365
+ - ▁some
366
+ - mi
367
+ - ▁time
368
+ - ▁your
369
+ - me
370
+ - ▁going
371
+ - op
372
+ - am
373
+ - per
374
+ - et
375
+ - ▁would
376
+ - ru
377
+ - ure
378
+ - ti
379
+ - ist
380
+ - ▁their
381
+ - x
382
+ - ▁were
383
+ - ▁look
384
+ - ▁pro
385
+ - ▁which
386
+ - ▁work
387
+ - tion
388
+ - est
389
+ - ty
390
+ - im
391
+ - z
392
+ - ta
393
+ - ▁want
394
+ - ▁two
395
+ - age
396
+ - ▁really
397
+ - om
398
+ - ma
399
+ - ers
400
+ - ting
401
+ - ▁world
402
+ - co
403
+ - ▁way
404
+ - ▁don
405
+ - wa
406
+ - hi
407
+ - tra
408
+ - ▁la
409
+ - ▁here
410
+ - able
411
+ - lu
412
+ - ▁other
413
+ - mo
414
+ - ies
415
+ - ▁has
416
+ - ▁could
417
+ - j
418
+ - ▁make
419
+ - ally
420
+ - ▁sta
421
+ - ten
422
+ - ▁will
423
+ - ▁un
424
+ - ig
425
+ - ▁where
426
+ - ▁into
427
+ - ke
428
+ - ▁than
429
+ - ▁comp
430
+ - ▁actually
431
+ - tic
432
+ - sh
433
+ - ▁did
434
+ - tor
435
+ - fa
436
+ - ical
437
+ - ▁she
438
+ - ▁years
439
+ - ▁say
440
+ - one
441
+ - ted
442
+ - ▁things
443
+ - ph
444
+ - ▁new
445
+ - ▁pre
446
+ - ▁any
447
+ - ▁thousand
448
+ - ▁been
449
+ - ▁inter
450
+ - ▁his
451
+ - ▁com
452
+ - ▁need
453
+ - nce
454
+ - ▁right
455
+ - ▁take
456
+ - ▁even
457
+ - ▁over
458
+ - ▁start
459
+ - ▁hundred
460
+ - min
461
+ - ▁sp
462
+ - ▁those
463
+ - ▁car
464
+ - ▁then
465
+ - mp
466
+ - ap
467
+ - ▁first
468
+ - les
469
+ - ize
470
+ - ▁every
471
+ - ba
472
+ - ▁something
473
+ - ▁well
474
+ - ard
475
+ - ▁str
476
+ - ▁back
477
+ - und
478
+ - ia
479
+ - pl
480
+ - ki
481
+ - ho
482
+ - ▁call
483
+ - ▁most
484
+ - ▁also
485
+ - bi
486
+ - ▁thing
487
+ - ▁life
488
+ - um
489
+ - ▁said
490
+ - ▁kind
491
+ - ▁lot
492
+ - ▁much
493
+ - va
494
+ - ▁ra
495
+ - ▁little
496
+ - ▁dr
497
+ - ▁got
498
+ - ▁come
499
+ - ful
500
+ - ▁talk
501
+ - ▁part
502
+ - ▁day
503
+ - ant
504
+ - ction
505
+ - ▁happen
506
+ - ▁only
507
+ - ▁many
508
+ - ▁wo
509
+ - pri
510
+ - ▁her
511
+ - ▁br
512
+ - qui
513
+ - ▁mean
514
+ - ▁three
515
+ - iv
516
+ - ▁different
517
+ - ugh
518
+ - ain
519
+ - ▁human
520
+ - ance
521
+ - ▁change
522
+ - ▁let
523
+ - ▁real
524
+ - ▁show
525
+ - ▁good
526
+ - ▁around
527
+ - ▁through
528
+ - ▁jo
529
+ - bu
530
+ - ▁down
531
+ - ight
532
+ - ga
533
+ - ▁why
534
+ - ▁live
535
+ - ff
536
+ - ▁tell
537
+ - ▁put
538
+ - ▁idea
539
+ - port
540
+ - ▁same
541
+ - ▁give
542
+ - ated
543
+ - ish
544
+ - ible
545
+ - ▁though
546
+ - ious
547
+ - ▁problem
548
+ - ▁five
549
+ - par
550
+ - ▁fact
551
+ - ▁cha
552
+ - ition
553
+ - ▁year
554
+ - ▁big
555
+ - ▁plan
556
+ - ▁great
557
+ - ▁find
558
+ - ▁four
559
+ - ▁app
560
+ - ▁after
561
+ - ▁system
562
+ - ▁place
563
+ - ▁em
564
+ - ▁build
565
+ - ▁percent
566
+ - ▁again
567
+ - ▁point
568
+ - ▁learn
569
+ - ▁own
570
+ - ▁long
571
+ - ▁made
572
+ - ▁today
573
+ - ▁nine
574
+ - ities
575
+ - ▁gene
576
+ - ▁six
577
+ - ▁question
578
+ - light
579
+ - ▁should
580
+ - ▁came
581
+ - ▁feel
582
+ - ▁turn
583
+ - ▁person
584
+ - ▁end
585
+ - ▁hu
586
+ - ▁design
587
+ - ▁help
588
+ - ▁brain
589
+ - ▁last
590
+ - ▁create
591
+ - ▁important
592
+ - ▁before
593
+ - ▁high
594
+ - ▁never
595
+ - ▁trans
596
+ - ▁another
597
+ - ▁him
598
+ - ▁eight
599
+ - ▁might
600
+ - ▁understand
601
+ - ▁power
602
+ - ▁better
603
+ - q
604
+ - ▁found
605
+ - ▁play
606
+ - ▁twenty
607
+ - ▁still
608
+ - ▁school
609
+ - ▁each
610
+ - ▁seven
611
+ - ▁together
612
+ - ▁few
613
+ - ▁hand
614
+ - ▁example
615
+ - que
616
+ - ▁next
617
+ - ▁million
618
+ - ▁story
619
+ - ▁women
620
+ - ▁under
621
+ - ▁number
622
+ - ▁course
623
+ - ▁water
624
+ - ▁ago
625
+ - ▁grow
626
+ - ▁between
627
+ - ▁develop
628
+ - ▁america
629
+ - ▁sort
630
+ - ▁technology
631
+ - ▁believe
632
+ - ▁second
633
+ - ▁small
634
+ - ▁maybe
635
+ - ▁become
636
+ - press
637
+ - ▁health
638
+ - ▁space
639
+ - ▁word
640
+ - ▁hard
641
+ - ▁children
642
+ - ▁organ
643
+ - ▁always
644
+ - ▁country
645
+ - ▁reason
646
+ - ▁experience
647
+ - ▁large
648
+ - ▁everything
649
+ - ▁friend
650
+ - ▁project
651
+ - ▁computer
652
+ - ▁fifty
653
+ - ▁money
654
+ - ▁information
655
+ - graph
656
+ - ▁walk
657
+ - ization
658
+ - ▁africa
659
+ - ▁picture
660
+ - ▁process
661
+ - ▁teach
662
+ - ▁enough
663
+ - ▁elect
664
+ - ▁thirty
665
+ - '0'
666
+ - '1'
667
+ - '2'
668
+ - '9'
669
+ - '3'
670
+ - '5'
671
+ - '8'
672
+ - '4'
673
+ - '7'
674
+ - '6'
675
+ - '&'
676
+ - +
677
+ - '#'
678
+ - '@'
679
+ - '*'
680
+ - \
681
+ - ^
682
+ - R
683
+ - _
684
+ - '-'
685
+ - '%'
686
+ - '='
687
+ - $
688
+ - M
689
+ - ā
690
+ - ']'
691
+ - E
692
+ - U
693
+ - A
694
+ - G
695
+ - '['
696
+ - <sos/eos>
697
+ init: null
698
+ input_size: null
699
+ ctc_conf:
700
+ dropout_rate: 0.0
701
+ ctc_type: builtin
702
+ reduce: true
703
+ ignore_nan_grad: null
704
+ zero_infinity: true
705
+ joint_net_conf: null
706
+ use_preprocessor: true
707
+ token_type: bpe
708
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
709
+ non_linguistic_symbols: null
710
+ cleaner: null
711
+ g2p: null
712
+ speech_volume_normalize: null
713
+ rir_scp: null
714
+ rir_apply_prob: 1.0
715
+ noise_scp: null
716
+ noise_apply_prob: 1.0
717
+ noise_db_range: '13_15'
718
+ short_noise_thres: 0.5
719
+ aux_ctc_tasks: []
720
+ frontend: default
721
+ frontend_conf:
722
+ n_fft: 512
723
+ win_length: 400
724
+ hop_length: 160
725
+ fs: 16k
726
+ specaug: specaug
727
+ specaug_conf:
728
+ apply_time_warp: true
729
+ time_warp_window: 5
730
+ time_warp_mode: bicubic
731
+ apply_freq_mask: true
732
+ freq_mask_width_range:
733
+ - 0
734
+ - 27
735
+ num_freq_mask: 2
736
+ apply_time_mask: true
737
+ time_mask_width_ratio_range:
738
+ - 0.0
739
+ - 0.05
740
+ num_time_mask: 5
741
+ normalize: global_mvn
742
+ normalize_conf:
743
+ stats_file: exp/asr_stats_raw_en_bpe500_sp/train/feats_stats.npz
744
+ model: espnet
745
+ model_conf:
746
+ ctc_weight: 0.3
747
+ lsm_weight: 0.1
748
+ length_normalized_loss: false
749
+ preencoder: null
750
+ preencoder_conf: {}
751
+ encoder: e_branchformer
752
+ encoder_conf:
753
+ output_size: 256
754
+ attention_heads: 4
755
+ attention_layer_type: rel_selfattn
756
+ pos_enc_layer_type: rel_pos
757
+ rel_pos_type: latest
758
+ cgmlp_linear_units: 1024
759
+ cgmlp_conv_kernel: 31
760
+ use_linear_after_conv: false
761
+ gate_activation: identity
762
+ num_blocks: 12
763
+ dropout_rate: 0.1
764
+ positional_dropout_rate: 0.1
765
+ attention_dropout_rate: 0.1
766
+ input_layer: conv2d
767
+ layer_drop_rate: 0.0
768
+ linear_units: 1024
769
+ positionwise_layer_type: linear
770
+ use_ffn: true
771
+ macaron_ffn: true
772
+ merge_conv_kernel: 31
773
+ postencoder: null
774
+ postencoder_conf: {}
775
+ decoder: transformer
776
+ decoder_conf:
777
+ attention_heads: 4
778
+ linear_units: 2048
779
+ num_blocks: 6
780
+ dropout_rate: 0.1
781
+ positional_dropout_rate: 0.1
782
+ self_attention_dropout_rate: 0.1
783
+ src_attention_dropout_rate: 0.1
784
+ preprocessor: default
785
+ preprocessor_conf: {}
786
+ required:
787
+ - output_dir
788
+ - token_list
789
+ version: '202301'
790
+ distributed: true
791
+ ```
792
+
793
+ </details>
794
+
795
+
796
+
797
+ ### Citing ESPnet
798
+
799
+ ```BibTex
800
+ @inproceedings{watanabe2018espnet,
801
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
802
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
803
+ year={2018},
804
+ booktitle={Proceedings of Interspeech},
805
+ pages={2207--2211},
806
+ doi={10.21437/Interspeech.2018-1456},
807
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
808
+ }
809
+
810
+
811
+
812
+
813
+ ```
814
+
815
+ or arXiv:
816
+
817
+ ```bibtex
818
+ @misc{watanabe2018espnet,
819
+ title={ESPnet: End-to-End Speech Processing Toolkit},
820
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
821
+ year={2018},
822
+ eprint={1804.00015},
823
+ archivePrefix={arXiv},
824
+ primaryClass={cs.CL}
825
+ }
826
+ ```