Spaces:
Runtime error
Runtime error
Delete wav2vecasr/model/checkpoint-1200
Browse files- wav2vecasr/model/checkpoint-1200/config.json +0 -108
- wav2vecasr/model/checkpoint-1200/preprocessor_config.json +0 -9
- wav2vecasr/model/checkpoint-1200/pytorch_model.bin +0 -3
- wav2vecasr/model/checkpoint-1200/rng_state.pth +0 -3
- wav2vecasr/model/checkpoint-1200/scaler.pt +0 -3
- wav2vecasr/model/checkpoint-1200/scheduler.pt +0 -3
- wav2vecasr/model/checkpoint-1200/special_tokens_map.json +0 -1
- wav2vecasr/model/checkpoint-1200/tokenizer_config.json +0 -1
- wav2vecasr/model/checkpoint-1200/trainer_state.json +0 -106
- wav2vecasr/model/checkpoint-1200/training_args.bin +0 -3
- wav2vecasr/model/checkpoint-1200/vocab.json +0 -1
wav2vecasr/model/checkpoint-1200/config.json
DELETED
@@ -1,108 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "/content/drive/MyDrive/NUS/Y4S1/Sound and Music Computing/CS4347 Project/Experiments/Wav2Vec Baselines/L2 Artic 3 Speakers: Baseline 2/wav2vec-baseline2-model-checkpoints/checkpoint-600",
|
3 |
-
"activation_dropout": 0.0,
|
4 |
-
"adapter_kernel_size": 3,
|
5 |
-
"adapter_stride": 2,
|
6 |
-
"add_adapter": false,
|
7 |
-
"apply_spec_augment": true,
|
8 |
-
"architectures": [
|
9 |
-
"Wav2Vec2ForCTC"
|
10 |
-
],
|
11 |
-
"attention_dropout": 0.1,
|
12 |
-
"bos_token_id": 1,
|
13 |
-
"classifier_proj_size": 256,
|
14 |
-
"codevector_dim": 768,
|
15 |
-
"contrastive_logits_temperature": 0.1,
|
16 |
-
"conv_bias": true,
|
17 |
-
"conv_dim": [
|
18 |
-
512,
|
19 |
-
512,
|
20 |
-
512,
|
21 |
-
512,
|
22 |
-
512,
|
23 |
-
512,
|
24 |
-
512
|
25 |
-
],
|
26 |
-
"conv_kernel": [
|
27 |
-
10,
|
28 |
-
3,
|
29 |
-
3,
|
30 |
-
3,
|
31 |
-
3,
|
32 |
-
2,
|
33 |
-
2
|
34 |
-
],
|
35 |
-
"conv_stride": [
|
36 |
-
5,
|
37 |
-
2,
|
38 |
-
2,
|
39 |
-
2,
|
40 |
-
2,
|
41 |
-
2,
|
42 |
-
2
|
43 |
-
],
|
44 |
-
"ctc_loss_reduction": "mean",
|
45 |
-
"ctc_zero_infinity": false,
|
46 |
-
"diversity_loss_weight": 0.1,
|
47 |
-
"do_stable_layer_norm": true,
|
48 |
-
"eos_token_id": 2,
|
49 |
-
"feat_extract_activation": "gelu",
|
50 |
-
"feat_extract_dropout": 0.0,
|
51 |
-
"feat_extract_norm": "layer",
|
52 |
-
"feat_proj_dropout": 0.0,
|
53 |
-
"feat_quantizer_dropout": 0.0,
|
54 |
-
"final_dropout": 0.0,
|
55 |
-
"gradient_checkpointing": false,
|
56 |
-
"hidden_act": "gelu",
|
57 |
-
"hidden_dropout": 0.1,
|
58 |
-
"hidden_size": 1024,
|
59 |
-
"initializer_range": 0.02,
|
60 |
-
"intermediate_size": 4096,
|
61 |
-
"layer_norm_eps": 1e-05,
|
62 |
-
"layerdrop": 0.0,
|
63 |
-
"mask_feature_length": 64,
|
64 |
-
"mask_feature_min_masks": 0,
|
65 |
-
"mask_feature_prob": 0.25,
|
66 |
-
"mask_time_length": 10,
|
67 |
-
"mask_time_min_masks": 2,
|
68 |
-
"mask_time_prob": 0.75,
|
69 |
-
"model_type": "wav2vec2",
|
70 |
-
"num_adapter_layers": 3,
|
71 |
-
"num_attention_heads": 16,
|
72 |
-
"num_codevector_groups": 2,
|
73 |
-
"num_codevectors_per_group": 320,
|
74 |
-
"num_conv_pos_embedding_groups": 16,
|
75 |
-
"num_conv_pos_embeddings": 128,
|
76 |
-
"num_feat_extract_layers": 7,
|
77 |
-
"num_hidden_layers": 24,
|
78 |
-
"num_negatives": 100,
|
79 |
-
"output_hidden_size": 1024,
|
80 |
-
"pad_token_id": 82,
|
81 |
-
"proj_codevector_dim": 768,
|
82 |
-
"tdnn_dilation": [
|
83 |
-
1,
|
84 |
-
2,
|
85 |
-
3,
|
86 |
-
1,
|
87 |
-
1
|
88 |
-
],
|
89 |
-
"tdnn_dim": [
|
90 |
-
512,
|
91 |
-
512,
|
92 |
-
512,
|
93 |
-
512,
|
94 |
-
1500
|
95 |
-
],
|
96 |
-
"tdnn_kernel": [
|
97 |
-
5,
|
98 |
-
3,
|
99 |
-
3,
|
100 |
-
1,
|
101 |
-
1
|
102 |
-
],
|
103 |
-
"torch_dtype": "float32",
|
104 |
-
"transformers_version": "4.17.0",
|
105 |
-
"use_weighted_layer_sum": false,
|
106 |
-
"vocab_size": 83,
|
107 |
-
"xvector_output_dim": 512
|
108 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wav2vecasr/model/checkpoint-1200/preprocessor_config.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"do_normalize": true,
|
3 |
-
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
-
"feature_size": 1,
|
5 |
-
"padding_side": "right",
|
6 |
-
"padding_value": 0.0,
|
7 |
-
"return_attention_mask": false,
|
8 |
-
"sampling_rate": 16000
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wav2vecasr/model/checkpoint-1200/pytorch_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:44bd3813b64d85faa8f88091f160cc107e340ee71372b470dd6c4b09cb00906d
|
3 |
-
size 1262269741
|
|
|
|
|
|
|
|
wav2vecasr/model/checkpoint-1200/rng_state.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6686b1782523e688cd46835b2db33ae51a6ffd852401967b311db1a20efad2ee
|
3 |
-
size 14639
|
|
|
|
|
|
|
|
wav2vecasr/model/checkpoint-1200/scaler.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:662b2a6102fe369b78bf169eb2bcea08b4dc636d31dfd2652b32a63eda7e03e8
|
3 |
-
size 557
|
|
|
|
|
|
|
|
wav2vecasr/model/checkpoint-1200/scheduler.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:dcbd8882ac5e67f1b9d59f4eaa2583483d429dfecec5ce45fd99da4d06e47847
|
3 |
-
size 627
|
|
|
|
|
|
|
|
wav2vecasr/model/checkpoint-1200/special_tokens_map.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
|
|
|
wav2vecasr/model/checkpoint-1200/tokenizer_config.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
|
|
|
wav2vecasr/model/checkpoint-1200/trainer_state.json
DELETED
@@ -1,106 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"best_metric": 17.541647491946616,
|
3 |
-
"best_model_checkpoint": "/content/drive/MyDrive/NUS/Y4S1/Sound and Music Computing/CS4347 Project/Experiments/Wav2Vec Baselines/L2 Artic 3 Speakers: Baseline 2/wav2vec-baseline2-model-checkpoints/checkpoint-200",
|
4 |
-
"epoch": 4.411764705882353,
|
5 |
-
"global_step": 1200,
|
6 |
-
"is_hyper_param_search": false,
|
7 |
-
"is_local_process_zero": true,
|
8 |
-
"is_world_process_zero": true,
|
9 |
-
"log_history": [
|
10 |
-
{
|
11 |
-
"epoch": 0.74,
|
12 |
-
"learning_rate": 0.0001194,
|
13 |
-
"loss": 4.1351,
|
14 |
-
"step": 200
|
15 |
-
},
|
16 |
-
{
|
17 |
-
"epoch": 0.74,
|
18 |
-
"eval_loss": 1.9543204307556152,
|
19 |
-
"eval_per": 17.541647491946616,
|
20 |
-
"eval_runtime": 143.4439,
|
21 |
-
"eval_samples_per_second": 15.149,
|
22 |
-
"eval_steps_per_second": 1.896,
|
23 |
-
"step": 200
|
24 |
-
},
|
25 |
-
{
|
26 |
-
"epoch": 1.47,
|
27 |
-
"learning_rate": 0.0002394,
|
28 |
-
"loss": 1.7915,
|
29 |
-
"step": 400
|
30 |
-
},
|
31 |
-
{
|
32 |
-
"epoch": 1.47,
|
33 |
-
"eval_loss": 1.692239761352539,
|
34 |
-
"eval_per": 7.960756135308424,
|
35 |
-
"eval_runtime": 137.113,
|
36 |
-
"eval_samples_per_second": 15.848,
|
37 |
-
"eval_steps_per_second": 1.984,
|
38 |
-
"step": 400
|
39 |
-
},
|
40 |
-
{
|
41 |
-
"epoch": 2.21,
|
42 |
-
"learning_rate": 0.0002971387283236994,
|
43 |
-
"loss": 1.2246,
|
44 |
-
"step": 600
|
45 |
-
},
|
46 |
-
{
|
47 |
-
"epoch": 2.21,
|
48 |
-
"eval_loss": 0.5273078083992004,
|
49 |
-
"eval_per": 0.31805393535991217,
|
50 |
-
"eval_runtime": 136.1021,
|
51 |
-
"eval_samples_per_second": 15.966,
|
52 |
-
"eval_steps_per_second": 1.999,
|
53 |
-
"step": 600
|
54 |
-
},
|
55 |
-
{
|
56 |
-
"epoch": 2.94,
|
57 |
-
"learning_rate": 0.0002913872832369942,
|
58 |
-
"loss": 0.9433,
|
59 |
-
"step": 800
|
60 |
-
},
|
61 |
-
{
|
62 |
-
"epoch": 2.94,
|
63 |
-
"eval_loss": 0.41386935114860535,
|
64 |
-
"eval_per": 0.2565853269749339,
|
65 |
-
"eval_runtime": 136.2091,
|
66 |
-
"eval_samples_per_second": 15.953,
|
67 |
-
"eval_steps_per_second": 1.997,
|
68 |
-
"step": 800
|
69 |
-
},
|
70 |
-
{
|
71 |
-
"epoch": 3.68,
|
72 |
-
"learning_rate": 0.000285606936416185,
|
73 |
-
"loss": 0.8842,
|
74 |
-
"step": 1000
|
75 |
-
},
|
76 |
-
{
|
77 |
-
"epoch": 3.68,
|
78 |
-
"eval_loss": 0.3962230980396271,
|
79 |
-
"eval_per": 0.24980343554684897,
|
80 |
-
"eval_runtime": 139.9847,
|
81 |
-
"eval_samples_per_second": 15.523,
|
82 |
-
"eval_steps_per_second": 1.943,
|
83 |
-
"step": 1000
|
84 |
-
},
|
85 |
-
{
|
86 |
-
"epoch": 4.41,
|
87 |
-
"learning_rate": 0.00027982658959537567,
|
88 |
-
"loss": 0.8542,
|
89 |
-
"step": 1200
|
90 |
-
},
|
91 |
-
{
|
92 |
-
"epoch": 4.41,
|
93 |
-
"eval_loss": 0.3784765601158142,
|
94 |
-
"eval_per": 0.24003603985584057,
|
95 |
-
"eval_runtime": 136.6045,
|
96 |
-
"eval_samples_per_second": 15.907,
|
97 |
-
"eval_steps_per_second": 1.991,
|
98 |
-
"step": 1200
|
99 |
-
}
|
100 |
-
],
|
101 |
-
"max_steps": 10880,
|
102 |
-
"num_train_epochs": 40,
|
103 |
-
"total_flos": 4.4309288969819863e+18,
|
104 |
-
"trial_name": null,
|
105 |
-
"trial_params": null
|
106 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wav2vecasr/model/checkpoint-1200/training_args.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6d30a9efd4c6d95a24828b0e52d71ed7ad4f3c83075c158d1488ebe5f50b6719
|
3 |
-
size 3323
|
|
|
|
|
|
|
|
wav2vecasr/model/checkpoint-1200/vocab.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"K*": 0, "Z*": 1, "AE*": 2, "B": 3, "UH*": 4, "W": 5, "SIL": 6, "CH": 7, "EH*": 8, "T": 9, "D_": 10, "W*": 11, "K": 12, "spn": 13, "AH": 14, "AH*": 15, "NG": 16, "P*": 17, "B*": 18, "G": 19, "OY": 20, "D": 21, "ZH": 22, "sp": 23, "V": 24, "EY": 25, "V``": 26, "UW": 27, "s": 28, "P": 29, "UW*": 30, "ER*": 31, "sil": 32, "R*": 33, "IH": 34, "OW": 35, "HH*": 36, "Y": 37, "AO": 38, "AW*": 39, "ER": 40, "OW*": 41, "AY": 42, "M": 43, "T*": 44, "DH": 45, "AA*": 46, "L": 47, "AX": 48, "N*": 49, "EH": 50, "DH*": 51, "t": 52, "ERR": 53, "AO*": 54, "Z": 55, "S": 56, "ZH*": 57, "EY*": 58, "JH*": 59, "F": 60, "L*": 61, "Y*": 62, "R": 63, "G*": 64, "JH": 65, "W`": 66, "D*": 67, "AA": 68, "IY": 69, "AE": 70, "Ah": 71, "AW": 72, "SH": 73, "TH": 74, "N": 75, "V*": 76, "HH": 77, "UH": 78, "err": 79, "|": 80, "[UNK]": 81, "[PAD]": 82}
|
|
|
|