UniversalAlgorithmic commited on
Commit
0e17446
·
verified ·
1 Parent(s): 781a945
qa/README.md DELETED
@@ -1,55 +0,0 @@
1
- ---
2
- library_name: transformers
3
- license: apache-2.0
4
- base_model: google-bert/bert-base-uncased
5
- tags:
6
- - generated_from_trainer
7
- datasets:
8
- - squad
9
- model-index:
10
- - name: baseline
11
- results: []
12
- ---
13
-
14
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
- should probably proofread and complete it, then remove this comment. -->
16
-
17
- # baseline
18
-
19
- This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on the squad dataset.
20
-
21
- ## Model description
22
-
23
- More information needed
24
-
25
- ## Intended uses & limitations
26
-
27
- More information needed
28
-
29
- ## Training and evaluation data
30
-
31
- More information needed
32
-
33
- ## Training procedure
34
-
35
- ### Training hyperparameters
36
-
37
- The following hyperparameters were used during training:
38
- - learning_rate: 3e-05
39
- - train_batch_size: 12
40
- - eval_batch_size: 8
41
- - seed: 42
42
- - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
- - lr_scheduler_type: linear
44
- - num_epochs: 2.0
45
-
46
- ### Training results
47
-
48
-
49
-
50
- ### Framework versions
51
-
52
- - Transformers 4.49.0
53
- - Pytorch 2.6.0+cu118
54
- - Datasets 3.3.1
55
- - Tokenizers 0.21.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
qa/all_results.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "epoch": 2.0,
3
- "eval_exact_match": 81.49479659413434,
4
- "eval_f1": 88.62945564424126,
5
- "eval_runtime": 61.0301,
6
- "eval_samples": 10784,
7
- "eval_samples_per_second": 176.7,
8
- "eval_steps_per_second": 22.087,
9
- "total_flos": 3.541929151120589e+16,
10
- "train_loss": 1.148573803161563,
11
- "train_runtime": 3245.3985,
12
- "train_samples": 88524,
13
- "train_samples_per_second": 54.554,
14
- "train_steps_per_second": 4.546
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
qa/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "_name_or_path": "google-bert/bert-base-uncased",
3
- "architectures": [
4
- "BertForQuestionAnswering"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "layer_norm_eps": 1e-12,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 0,
20
- "position_embedding_type": "absolute",
21
- "torch_dtype": "float32",
22
- "transformers_version": "4.49.0",
23
- "type_vocab_size": 2,
24
- "use_cache": true,
25
- "vocab_size": 30522
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
qa/eval_nbest_predictions.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b8d44953cbe0ce20d1d1b62b72e7adba18bf1dc81d055492e22bfa21ff46657
3
- size 49596120
 
 
 
 
qa/eval_predictions.json DELETED
The diff for this file is too large to render. See raw diff
 
qa/eval_results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "epoch": 2.0,
3
- "eval_exact_match": 81.49479659413434,
4
- "eval_f1": 88.62945564424126,
5
- "eval_runtime": 61.0301,
6
- "eval_samples": 10784,
7
- "eval_samples_per_second": 176.7,
8
- "eval_steps_per_second": 22.087
9
- }
 
 
 
 
 
 
 
 
 
 
qa/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:38003bd65e4bfa70dd16886f29af7ab00d1aa0ae4de191b0a7de4d7883d17dde
3
- size 442683784
 
 
 
 
qa/runs/May15_03-24-14_cs-Precision-7960-Tower/events.out.tfevents.1747293859.cs-Precision-7960-Tower.147971.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:36bfca6273a2422943de7b634cf75efd69b8e92079abe84df9e9c9e026d497f6
3
- size 11535
 
 
 
 
qa/runs/May15_03-24-14_cs-Precision-7960-Tower/events.out.tfevents.1747297197.cs-Precision-7960-Tower.147971.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:259c79a03ba9c522b1fd728e92dae5cfc31c6cd73b2377d124749c83a0163910
3
- size 412
 
 
 
 
qa/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
qa/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
qa/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "100": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "101": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "102": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "103": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "clean_up_tokenization_spaces": false,
45
- "cls_token": "[CLS]",
46
- "do_lower_case": true,
47
- "extra_special_tokens": {},
48
- "mask_token": "[MASK]",
49
- "model_max_length": 512,
50
- "pad_token": "[PAD]",
51
- "sep_token": "[SEP]",
52
- "strip_accents": null,
53
- "tokenize_chinese_chars": true,
54
- "tokenizer_class": "BertTokenizer",
55
- "unk_token": "[UNK]"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
qa/train_results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "epoch": 2.0,
3
- "total_flos": 3.541929151120589e+16,
4
- "train_loss": 1.148573803161563,
5
- "train_runtime": 3245.3985,
6
- "train_samples": 88524,
7
- "train_samples_per_second": 54.554,
8
- "train_steps_per_second": 4.546
9
- }
 
 
 
 
 
 
 
 
 
 
qa/trainer_state.json DELETED
@@ -1,245 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
- "eval_steps": 500,
6
- "global_step": 14754,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.06777822963264199,
13
- "grad_norm": 31.397275924682617,
14
- "learning_rate": 2.8983326555510372e-05,
15
- "loss": 2.7299,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.13555645926528398,
20
- "grad_norm": 25.8492431640625,
21
- "learning_rate": 2.796665311102074e-05,
22
- "loss": 1.752,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 0.203334688897926,
27
- "grad_norm": 29.627431869506836,
28
- "learning_rate": 2.694997966653111e-05,
29
- "loss": 1.5588,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 0.27111291853056796,
34
- "grad_norm": 21.147193908691406,
35
- "learning_rate": 2.593330622204148e-05,
36
- "loss": 1.5014,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 0.33889114816321,
41
- "grad_norm": 17.81966781616211,
42
- "learning_rate": 2.491663277755185e-05,
43
- "loss": 1.4768,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 0.406669377795852,
48
- "grad_norm": 20.26822853088379,
49
- "learning_rate": 2.389995933306222e-05,
50
- "loss": 1.4064,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 0.47444760742849396,
55
- "grad_norm": 16.216028213500977,
56
- "learning_rate": 2.288328588857259e-05,
57
- "loss": 1.3502,
58
- "step": 3500
59
- },
60
- {
61
- "epoch": 0.5422258370611359,
62
- "grad_norm": 17.930505752563477,
63
- "learning_rate": 2.1866612444082963e-05,
64
- "loss": 1.3101,
65
- "step": 4000
66
- },
67
- {
68
- "epoch": 0.6100040666937779,
69
- "grad_norm": 26.499574661254883,
70
- "learning_rate": 2.084993899959333e-05,
71
- "loss": 1.2922,
72
- "step": 4500
73
- },
74
- {
75
- "epoch": 0.67778229632642,
76
- "grad_norm": 26.83368492126465,
77
- "learning_rate": 1.9833265555103702e-05,
78
- "loss": 1.3053,
79
- "step": 5000
80
- },
81
- {
82
- "epoch": 0.745560525959062,
83
- "grad_norm": 22.85872459411621,
84
- "learning_rate": 1.8816592110614073e-05,
85
- "loss": 1.2555,
86
- "step": 5500
87
- },
88
- {
89
- "epoch": 0.813338755591704,
90
- "grad_norm": 23.48080825805664,
91
- "learning_rate": 1.779991866612444e-05,
92
- "loss": 1.2068,
93
- "step": 6000
94
- },
95
- {
96
- "epoch": 0.8811169852243459,
97
- "grad_norm": 20.919252395629883,
98
- "learning_rate": 1.6783245221634812e-05,
99
- "loss": 1.1991,
100
- "step": 6500
101
- },
102
- {
103
- "epoch": 0.9488952148569879,
104
- "grad_norm": 23.9005126953125,
105
- "learning_rate": 1.576657177714518e-05,
106
- "loss": 1.2156,
107
- "step": 7000
108
- },
109
- {
110
- "epoch": 1.01667344448963,
111
- "grad_norm": 22.660743713378906,
112
- "learning_rate": 1.4749898332655551e-05,
113
- "loss": 1.0827,
114
- "step": 7500
115
- },
116
- {
117
- "epoch": 1.0844516741222718,
118
- "grad_norm": 25.28419303894043,
119
- "learning_rate": 1.373322488816592e-05,
120
- "loss": 0.8481,
121
- "step": 8000
122
- },
123
- {
124
- "epoch": 1.152229903754914,
125
- "grad_norm": 14.510698318481445,
126
- "learning_rate": 1.271655144367629e-05,
127
- "loss": 0.872,
128
- "step": 8500
129
- },
130
- {
131
- "epoch": 1.2200081333875559,
132
- "grad_norm": 29.12289810180664,
133
- "learning_rate": 1.1699877999186661e-05,
134
- "loss": 0.8375,
135
- "step": 9000
136
- },
137
- {
138
- "epoch": 1.287786363020198,
139
- "grad_norm": 19.038454055786133,
140
- "learning_rate": 1.0683204554697033e-05,
141
- "loss": 0.8464,
142
- "step": 9500
143
- },
144
- {
145
- "epoch": 1.35556459265284,
146
- "grad_norm": 21.09101676940918,
147
- "learning_rate": 9.666531110207402e-06,
148
- "loss": 0.8746,
149
- "step": 10000
150
- },
151
- {
152
- "epoch": 1.4233428222854818,
153
- "grad_norm": 20.79250144958496,
154
- "learning_rate": 8.649857665717772e-06,
155
- "loss": 0.8776,
156
- "step": 10500
157
- },
158
- {
159
- "epoch": 1.491121051918124,
160
- "grad_norm": 21.217571258544922,
161
- "learning_rate": 7.633184221228141e-06,
162
- "loss": 0.8523,
163
- "step": 11000
164
- },
165
- {
166
- "epoch": 1.5588992815507658,
167
- "grad_norm": 15.557079315185547,
168
- "learning_rate": 6.616510776738511e-06,
169
- "loss": 0.8387,
170
- "step": 11500
171
- },
172
- {
173
- "epoch": 1.626677511183408,
174
- "grad_norm": 14.53345012664795,
175
- "learning_rate": 5.5998373322488825e-06,
176
- "loss": 0.8377,
177
- "step": 12000
178
- },
179
- {
180
- "epoch": 1.6944557408160499,
181
- "grad_norm": 26.921611785888672,
182
- "learning_rate": 4.583163887759252e-06,
183
- "loss": 0.8449,
184
- "step": 12500
185
- },
186
- {
187
- "epoch": 1.7622339704486918,
188
- "grad_norm": 12.789366722106934,
189
- "learning_rate": 3.566490443269622e-06,
190
- "loss": 0.8547,
191
- "step": 13000
192
- },
193
- {
194
- "epoch": 1.830012200081334,
195
- "grad_norm": 37.19759750366211,
196
- "learning_rate": 2.549816998779992e-06,
197
- "loss": 0.818,
198
- "step": 13500
199
- },
200
- {
201
- "epoch": 1.8977904297139758,
202
- "grad_norm": 14.62682819366455,
203
- "learning_rate": 1.533143554290362e-06,
204
- "loss": 0.8128,
205
- "step": 14000
206
- },
207
- {
208
- "epoch": 1.965568659346618,
209
- "grad_norm": 21.051790237426758,
210
- "learning_rate": 5.164701098007319e-07,
211
- "loss": 0.8115,
212
- "step": 14500
213
- },
214
- {
215
- "epoch": 2.0,
216
- "step": 14754,
217
- "total_flos": 3.541929151120589e+16,
218
- "train_loss": 1.148573803161563,
219
- "train_runtime": 3245.3985,
220
- "train_samples_per_second": 54.554,
221
- "train_steps_per_second": 4.546
222
- }
223
- ],
224
- "logging_steps": 500,
225
- "max_steps": 14754,
226
- "num_input_tokens_seen": 0,
227
- "num_train_epochs": 2,
228
- "save_steps": 500,
229
- "stateful_callbacks": {
230
- "TrainerControl": {
231
- "args": {
232
- "should_epoch_stop": false,
233
- "should_evaluate": false,
234
- "should_log": false,
235
- "should_save": true,
236
- "should_training_stop": true
237
- },
238
- "attributes": {}
239
- }
240
- },
241
- "total_flos": 3.541929151120589e+16,
242
- "train_batch_size": 12,
243
- "trial_name": null,
244
- "trial_params": null
245
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
qa/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe8e61ba1ca1cb106ca9adca5e9262fa9a262238814728a69256855c78c32f51
3
- size 5304
 
 
 
 
qa/vocab.txt DELETED
The diff for this file is too large to render. See raw diff